xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d4e9f3b6443ec535fe5e409f785fa3cf019ceb6c)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *nprocs,j,idx,nsends,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
455   for (i=0; i<N; i++) {
456     idx = rows[i];
457     found = PETSC_FALSE;
458     for (j=0; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461       }
462     }
463     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   PetscLogObjectParent(A,istmp);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583   int        ierr,nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638   int        ierr;
639 
640   PetscFunctionBegin;
641   /* do nondiagonal part */
642   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
643   /* send it on its way */
644   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   /* do local part */
646   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
647   /* receive remote parts: note this assumes the values are not actually */
648   /* inserted in yy until the next line, which is true for my implementation*/
649   /* but is not perhaps always true. */
650   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   This only works correctly for square matrices where the subblock A->A is the
656    diagonal block
657 */
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661 {
662   int        ierr;
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664 
665   PetscFunctionBegin;
666   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667   if (a->rstart != a->cstart || a->rend != a->cend) {
668     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669   }
670   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatScale_MPIAIJ"
676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
683   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDestroy_MPIAIJ"
689 int MatDestroy_MPIAIJ(Mat mat)
690 {
691   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695 #if defined(PETSC_USE_LOG)
696   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697 #endif
698   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
699   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
702 #if defined (PETSC_USE_CTABLE)
703   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
704 #else
705   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
706 #endif
707   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
708   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
709   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
710   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
711   ierr = PetscFree(aij);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_Binary"
720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
724   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
725   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727   PetscScalar       *column_values;
728 
729   PetscFunctionBegin;
730   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
731   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
732   nz   = A->nz + B->nz;
733   if (rank == 0) {
734     header[0] = MAT_FILE_COOKIE;
735     header[1] = mat->M;
736     header[2] = mat->N;
737     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
738     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
739     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
740     /* get largest number of rows any processor has */
741     rlen = mat->m;
742     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
743     for (i=1; i<size; i++) {
744       rlen = PetscMax(rlen,range[i+1] - range[i]);
745     }
746   } else {
747     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     rlen = mat->m;
749   }
750 
751   /* load up the local row counts */
752   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
753   for (i=0; i<mat->m; i++) {
754     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755   }
756 
757   /* store the row lengths to the file */
758   if (rank == 0) {
759     MPI_Status status;
760     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
761     for (i=1; i<size; i++) {
762       rlen = range[i+1] - range[i];
763       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
764       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
765     }
766   } else {
767     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
768   }
769   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
770 
771   /* load up the local column indices */
772   nzmax = nz; /* )th processor needs space a largest processor needs */
773   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
774   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
775   cnt  = 0;
776   for (i=0; i<mat->m; i++) {
777     for (j=B->i[i]; j<B->i[i+1]; j++) {
778       if ( (col = garray[B->j[j]]) > cstart) break;
779       column_indices[cnt++] = col;
780     }
781     for (k=A->i[i]; k<A->i[i+1]; k++) {
782       column_indices[cnt++] = A->j[k] + cstart;
783     }
784     for (; j<B->i[i+1]; j++) {
785       column_indices[cnt++] = garray[B->j[j]];
786     }
787   }
788   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
789 
790   /* store the column indices to the file */
791   if (rank == 0) {
792     MPI_Status status;
793     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
796       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
803   }
804   ierr = PetscFree(column_indices);CHKERRQ(ierr);
805 
806   /* load up the local column values */
807   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
808   cnt  = 0;
809   for (i=0; i<mat->m; i++) {
810     for (j=B->i[i]; j<B->i[i+1]; j++) {
811       if ( garray[B->j[j]] > cstart) break;
812       column_values[cnt++] = B->a[j];
813     }
814     for (k=A->i[i]; k<A->i[i+1]; k++) {
815       column_values[cnt++] = A->a[k];
816     }
817     for (; j<B->i[i+1]; j++) {
818       column_values[cnt++] = B->a[j];
819     }
820   }
821   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
822 
823   /* store the column values to the file */
824   if (rank == 0) {
825     MPI_Status status;
826     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
827     for (i=1; i<size; i++) {
828       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
829       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
831       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
832     }
833   } else {
834     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
835     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
836   }
837   ierr = PetscFree(column_values);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844 {
845   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
846   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
847   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848   PetscTruth        isdraw,isascii,flg,isbinary;
849   PetscViewer       sviewer;
850   PetscViewerFormat format;
851 
852   PetscFunctionBegin;
853   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
856   if (isascii) {
857     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
858     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859       MatInfo info;
860       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
861       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
862       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
863       if (flg) {
864         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
865 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
866       } else {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
868 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
869       }
870       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
872       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
873       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
874       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
875       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
876       PetscFunctionReturn(0);
877     } else if (format == PETSC_VIEWER_ASCII_INFO) {
878       PetscFunctionReturn(0);
879     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
882 #endif
883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
884       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
885 #endif
886 
887       PetscFunctionReturn(0);
888     }
889   } else if (isbinary) {
890     if (size == 1) {
891       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
892       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
893     } else {
894       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
895     }
896     PetscFunctionReturn(0);
897   } else if (isdraw) {
898     PetscDraw  draw;
899     PetscTruth isnull;
900     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
901     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
902   }
903 
904   if (size == 1) {
905     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
906     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
907   } else {
908     /* assemble the entire matrix onto first processor. */
909     Mat         A;
910     Mat_SeqAIJ *Aloc;
911     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912     PetscScalar *a;
913 
914     if (!rank) {
915       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
916     } else {
917       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
918     }
919     PetscLogObjectParent(mat,A);
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqAIJ*)aij->A->data;
923     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     row = aij->rstart;
925     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926     for (i=0; i<m; i++) {
927       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
928       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929     }
930     aj = Aloc->j;
931     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
932 
933     /* copy over the B part */
934     Aloc = (Mat_SeqAIJ*)aij->B->data;
935     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936     row  = aij->rstart;
937     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
938     ct   = cols;
939     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940     for (i=0; i<m; i++) {
941       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
942       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943     }
944     ierr = PetscFree(ct);CHKERRQ(ierr);
945     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947     /*
948        Everyone has to call to draw the matrix since the graphics waits are
949        synchronized across all processors that share the PetscDraw object
950     */
951     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
952     if (!rank) {
953       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
954       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
955     }
956     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
957     ierr = MatDestroy(A);CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatView_MPIAIJ"
964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965 {
966   int        ierr;
967   PetscTruth isascii,isdraw,issocket,isbinary;
968 
969   PetscFunctionBegin;
970   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
971   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
974   if (isascii || isdraw || isbinary || issocket) {
975     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
976   } else {
977     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatRelax_MPIAIJ"
986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987 {
988   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
989   int          ierr;
990   Vec          bb1;
991   PetscScalar  mone=-1.0;
992 
993   PetscFunctionBegin;
994   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
995 
996   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
997 
998   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999     if (flag & SOR_ZERO_INITIAL_GUESS) {
1000       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1001       its--;
1002     }
1003 
1004     while (its--) {
1005       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1006       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1007 
1008       /* update rhs: bb1 = bb - B*x */
1009       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1010       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1011 
1012       /* local sweep */
1013       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014       CHKERRQ(ierr);
1015     }
1016   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1019       its--;
1020     }
1021     while (its--) {
1022       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1023       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1024 
1025       /* update rhs: bb1 = bb - B*x */
1026       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1027       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1028 
1029       /* local sweep */
1030       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031       CHKERRQ(ierr);
1032     }
1033   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034     if (flag & SOR_ZERO_INITIAL_GUESS) {
1035       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1036       its--;
1037     }
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else {
1051     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052   }
1053 
1054   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061 {
1062   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063   Mat        A = mat->A,B = mat->B;
1064   int        ierr;
1065   PetscReal  isend[5],irecv[5];
1066 
1067   PetscFunctionBegin;
1068   info->block_size     = 1.0;
1069   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1070   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071   isend[3] = info->memory;  isend[4] = info->mallocs;
1072   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1073   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074   isend[3] += info->memory;  isend[4] += info->mallocs;
1075   if (flag == MAT_LOCAL) {
1076     info->nz_used      = isend[0];
1077     info->nz_allocated = isend[1];
1078     info->nz_unneeded  = isend[2];
1079     info->memory       = isend[3];
1080     info->mallocs      = isend[4];
1081   } else if (flag == MAT_GLOBAL_MAX) {
1082     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1083     info->nz_used      = irecv[0];
1084     info->nz_allocated = irecv[1];
1085     info->nz_unneeded  = irecv[2];
1086     info->memory       = irecv[3];
1087     info->mallocs      = irecv[4];
1088   } else if (flag == MAT_GLOBAL_SUM) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   }
1096   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1097   info->fill_ratio_needed = 0;
1098   info->factor_mallocs    = 0;
1099   info->rows_global       = (double)matin->M;
1100   info->columns_global    = (double)matin->N;
1101   info->rows_local        = (double)matin->m;
1102   info->columns_local     = (double)matin->N;
1103 
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatSetOption_MPIAIJ"
1109 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110 {
1111   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112   int        ierr;
1113 
1114   PetscFunctionBegin;
1115   switch (op) {
1116   case MAT_NO_NEW_NONZERO_LOCATIONS:
1117   case MAT_YES_NEW_NONZERO_LOCATIONS:
1118   case MAT_COLUMNS_UNSORTED:
1119   case MAT_COLUMNS_SORTED:
1120   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121   case MAT_KEEP_ZEROED_ROWS:
1122   case MAT_NEW_NONZERO_LOCATION_ERR:
1123   case MAT_USE_INODES:
1124   case MAT_DO_NOT_USE_INODES:
1125   case MAT_IGNORE_ZERO_ENTRIES:
1126     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1127     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1128     break;
1129   case MAT_ROW_ORIENTED:
1130     a->roworiented = PETSC_TRUE;
1131     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1132     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1133     break;
1134   case MAT_ROWS_SORTED:
1135   case MAT_ROWS_UNSORTED:
1136   case MAT_YES_NEW_DIAGONALS:
1137     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1138     break;
1139   case MAT_COLUMN_ORIENTED:
1140     a->roworiented = PETSC_FALSE;
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_IGNORE_OFF_PROC_ENTRIES:
1145     a->donotstash = PETSC_TRUE;
1146     break;
1147   case MAT_NO_NEW_DIAGONALS:
1148     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1149   default:
1150     SETERRQ(PETSC_ERR_SUP,"unknown option");
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatGetRow_MPIAIJ"
1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1158 {
1159   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1160   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1161   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1162   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1163   int          *cmap,*idx_p;
1164 
1165   PetscFunctionBegin;
1166   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1167   mat->getrowactive = PETSC_TRUE;
1168 
1169   if (!mat->rowvalues && (idx || v)) {
1170     /*
1171         allocate enough space to hold information from the longest row.
1172     */
1173     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1174     int     max = 1,tmp;
1175     for (i=0; i<matin->m; i++) {
1176       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1177       if (max < tmp) { max = tmp; }
1178     }
1179     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1180     mat->rowindices = (int*)(mat->rowvalues + max);
1181   }
1182 
1183   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1184   lrow = row - rstart;
1185 
1186   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187   if (!v)   {pvA = 0; pvB = 0;}
1188   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1190   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1191   nztot = nzA + nzB;
1192 
1193   cmap  = mat->garray;
1194   if (v  || idx) {
1195     if (nztot) {
1196       /* Sort by increasing column numbers, assuming A and B already sorted */
1197       int imark = -1;
1198       if (v) {
1199         *v = v_p = mat->rowvalues;
1200         for (i=0; i<nzB; i++) {
1201           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202           else break;
1203         }
1204         imark = i;
1205         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1206         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1207       }
1208       if (idx) {
1209         *idx = idx_p = mat->rowindices;
1210         if (imark > -1) {
1211           for (i=0; i<imark; i++) {
1212             idx_p[i] = cmap[cworkB[i]];
1213           }
1214         } else {
1215           for (i=0; i<nzB; i++) {
1216             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217             else break;
1218           }
1219           imark = i;
1220         }
1221         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1222         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1223       }
1224     } else {
1225       if (idx) *idx = 0;
1226       if (v)   *v   = 0;
1227     }
1228   }
1229   *nz = nztot;
1230   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1231   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1238 {
1239   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1240 
1241   PetscFunctionBegin;
1242   if (aij->getrowactive == PETSC_FALSE) {
1243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1244   }
1245   aij->getrowactive = PETSC_FALSE;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatNorm_MPIAIJ"
1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1252 {
1253   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1254   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1255   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1256   PetscReal    sum = 0.0;
1257   PetscScalar  *v;
1258 
1259   PetscFunctionBegin;
1260   if (aij->size == 1) {
1261     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1262   } else {
1263     if (type == NORM_FROBENIUS) {
1264       v = amat->a;
1265       for (i=0; i<amat->nz; i++) {
1266 #if defined(PETSC_USE_COMPLEX)
1267         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1268 #else
1269         sum += (*v)*(*v); v++;
1270 #endif
1271       }
1272       v = bmat->a;
1273       for (i=0; i<bmat->nz; i++) {
1274 #if defined(PETSC_USE_COMPLEX)
1275         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1276 #else
1277         sum += (*v)*(*v); v++;
1278 #endif
1279       }
1280       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1281       *norm = sqrt(*norm);
1282     } else if (type == NORM_1) { /* max column norm */
1283       PetscReal *tmp,*tmp2;
1284       int    *jj,*garray = aij->garray;
1285       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1286       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1287       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1288       *norm = 0.0;
1289       v = amat->a; jj = amat->j;
1290       for (j=0; j<amat->nz; j++) {
1291         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1292       }
1293       v = bmat->a; jj = bmat->j;
1294       for (j=0; j<bmat->nz; j++) {
1295         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1296       }
1297       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1298       for (j=0; j<mat->N; j++) {
1299         if (tmp2[j] > *norm) *norm = tmp2[j];
1300       }
1301       ierr = PetscFree(tmp);CHKERRQ(ierr);
1302       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1303     } else if (type == NORM_INFINITY) { /* max row norm */
1304       PetscReal ntemp = 0.0;
1305       for (j=0; j<aij->A->m; j++) {
1306         v = amat->a + amat->i[j] + shift;
1307         sum = 0.0;
1308         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1309           sum += PetscAbsScalar(*v); v++;
1310         }
1311         v = bmat->a + bmat->i[j] + shift;
1312         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1313           sum += PetscAbsScalar(*v); v++;
1314         }
1315         if (sum > ntemp) ntemp = sum;
1316       }
1317       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1318     } else {
1319       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatTranspose_MPIAIJ"
1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328 {
1329   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1330   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1331   int          ierr,shift = Aloc->indexshift;
1332   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1333   Mat          B;
1334   PetscScalar  *array;
1335 
1336   PetscFunctionBegin;
1337   if (!matout && M != N) {
1338     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1339   }
1340 
1341   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1342 
1343   /* copy over the A part */
1344   Aloc = (Mat_SeqAIJ*)a->A->data;
1345   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346   row = a->rstart;
1347   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1348   for (i=0; i<m; i++) {
1349     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1350     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1351   }
1352   aj = Aloc->j;
1353   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1354 
1355   /* copy over the B part */
1356   Aloc = (Mat_SeqAIJ*)a->B->data;
1357   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1358   row  = a->rstart;
1359   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1360   ct   = cols;
1361   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1362   for (i=0; i<m; i++) {
1363     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1364     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1365   }
1366   ierr = PetscFree(ct);CHKERRQ(ierr);
1367   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1368   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369   if (matout) {
1370     *matout = B;
1371   } else {
1372     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380 {
1381   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1382   Mat        a = aij->A,b = aij->B;
1383   int        ierr,s1,s2,s3;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1387   if (rr) {
1388     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1389     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390     /* Overlap communication with computation. */
1391     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1392   }
1393   if (ll) {
1394     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1395     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1397   }
1398   /* scale  the diagonal block */
1399   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1400 
1401   if (rr) {
1402     /* Do a scatter end and then right scale the off-diagonal block */
1403     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1404     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1405   }
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1413 int MatPrintHelp_MPIAIJ(Mat A)
1414 {
1415   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1416   int        ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428 {
1429   PetscFunctionBegin;
1430   *bs = 1;
1431   PetscFunctionReturn(0);
1432 }
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1435 int MatSetUnfactored_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatEqual_MPIAIJ"
1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1448 {
1449   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1450   Mat        a,b,c,d;
1451   PetscTruth flg;
1452   int        ierr;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1456   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatCopy_MPIAIJ"
1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471 {
1472   int        ierr;
1473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475   PetscTruth flg;
1476 
1477   PetscFunctionBegin;
1478   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1479   if (str != SAME_NONZERO_PATTERN || !flg) {
1480     /* because of the column compression in the off-processor part of the matrix a->B,
1481        the number of columns in a->B and b->B may be different, hence we cannot call
1482        the MatCopy() directly on the two parts. If need be, we can provide a more
1483        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1484        then copying the submatrices */
1485     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1486   } else {
1487     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1488     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1495 int MatSetUpPreallocation_MPIAIJ(Mat A)
1496 {
1497   int        ierr;
1498 
1499   PetscFunctionBegin;
1500   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1511 #endif
1512 
1513 #include "petscblaslapack.h"
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatAXPY_MPIAIJ"
1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1518 {
1519   int        ierr,one;
1520   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1521   Mat_SeqAIJ *x,*y;
1522 
1523   PetscFunctionBegin;
1524   if (str == SAME_NONZERO_PATTERN) {
1525     x  = (Mat_SeqAIJ *)xx->A->data;
1526     y  = (Mat_SeqAIJ *)yy->A->data;
1527     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1528     x  = (Mat_SeqAIJ *)xx->B->data;
1529     y  = (Mat_SeqAIJ *)yy->B->data;
1530     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1531   } else {
1532     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 /* -------------------------------------------------------------------*/
1538 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1539        MatGetRow_MPIAIJ,
1540        MatRestoreRow_MPIAIJ,
1541        MatMult_MPIAIJ,
1542        MatMultAdd_MPIAIJ,
1543        MatMultTranspose_MPIAIJ,
1544        MatMultTransposeAdd_MPIAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549        0,
1550        0,
1551        MatRelax_MPIAIJ,
1552        MatTranspose_MPIAIJ,
1553        MatGetInfo_MPIAIJ,
1554        MatEqual_MPIAIJ,
1555        MatGetDiagonal_MPIAIJ,
1556        MatDiagonalScale_MPIAIJ,
1557        MatNorm_MPIAIJ,
1558        MatAssemblyBegin_MPIAIJ,
1559        MatAssemblyEnd_MPIAIJ,
1560        0,
1561        MatSetOption_MPIAIJ,
1562        MatZeroEntries_MPIAIJ,
1563        MatZeroRows_MPIAIJ,
1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1565        MatLUFactorSymbolic_MPIAIJ_TFS,
1566 #else
1567        0,
1568 #endif
1569        0,
1570        0,
1571        0,
1572        MatSetUpPreallocation_MPIAIJ,
1573        0,
1574        0,
1575        0,
1576        0,
1577        MatDuplicate_MPIAIJ,
1578        0,
1579        0,
1580        0,
1581        0,
1582        MatAXPY_MPIAIJ,
1583        MatGetSubMatrices_MPIAIJ,
1584        MatIncreaseOverlap_MPIAIJ,
1585        MatGetValues_MPIAIJ,
1586        MatCopy_MPIAIJ,
1587        MatPrintHelp_MPIAIJ,
1588        MatScale_MPIAIJ,
1589        0,
1590        0,
1591        0,
1592        MatGetBlockSize_MPIAIJ,
1593        0,
1594        0,
1595        0,
1596        0,
1597        MatFDColoringCreate_MPIAIJ,
1598        0,
1599        MatSetUnfactored_MPIAIJ,
1600        0,
1601        0,
1602        MatGetSubMatrix_MPIAIJ,
1603        MatDestroy_MPIAIJ,
1604        MatView_MPIAIJ,
1605        MatGetPetscMaps_Petsc,
1606        0,
1607        0,
1608        0,
1609        0,
1610        0,
1611        0,
1612        0,
1613        0,
1614        MatSetColoring_MPIAIJ,
1615        MatSetValuesAdic_MPIAIJ,
1616        MatSetValuesAdifor_MPIAIJ
1617 };
1618 
1619 /* ----------------------------------------------------------------------------------------*/
1620 
1621 EXTERN_C_BEGIN
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1624 int MatStoreValues_MPIAIJ(Mat mat)
1625 {
1626   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1627   int        ierr;
1628 
1629   PetscFunctionBegin;
1630   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1631   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 EXTERN_C_BEGIN
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1639 int MatRetrieveValues_MPIAIJ(Mat mat)
1640 {
1641   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1642   int        ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1646   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 EXTERN_C_END
1650 
1651 #include "petscpc.h"
1652 EXTERN_C_BEGIN
1653 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1654 EXTERN_C_END
1655 
1656 EXTERN_C_BEGIN
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatCreate_MPIAIJ"
1659 int MatCreate_MPIAIJ(Mat B)
1660 {
1661   Mat_MPIAIJ *b;
1662   int        ierr,i,size;
1663 
1664   PetscFunctionBegin;
1665   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1666 
1667   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1668   B->data         = (void*)b;
1669   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1670   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1671   B->factor       = 0;
1672   B->assembled    = PETSC_FALSE;
1673   B->mapping      = 0;
1674 
1675   B->insertmode      = NOT_SET_VALUES;
1676   b->size            = size;
1677   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1678 
1679   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1680   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1681 
1682   /* the information in the maps duplicates the information computed below, eventually
1683      we should remove the duplicate information that is not contained in the maps */
1684   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1685   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1686 
1687   /* build local table of row and column ownerships */
1688   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1689   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1690   b->cowners = b->rowners + b->size + 2;
1691   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1692   b->rowners[0] = 0;
1693   for (i=2; i<=b->size; i++) {
1694     b->rowners[i] += b->rowners[i-1];
1695   }
1696   b->rstart = b->rowners[b->rank];
1697   b->rend   = b->rowners[b->rank+1];
1698   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1699   b->cowners[0] = 0;
1700   for (i=2; i<=b->size; i++) {
1701     b->cowners[i] += b->cowners[i-1];
1702   }
1703   b->cstart = b->cowners[b->rank];
1704   b->cend   = b->cowners[b->rank+1];
1705 
1706   /* build cache for off array entries formed */
1707   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1708   b->donotstash  = PETSC_FALSE;
1709   b->colmap      = 0;
1710   b->garray      = 0;
1711   b->roworiented = PETSC_TRUE;
1712 
1713   /* stuff used for matrix vector multiply */
1714   b->lvec      = PETSC_NULL;
1715   b->Mvctx     = PETSC_NULL;
1716 
1717   /* stuff for MatGetRow() */
1718   b->rowindices   = 0;
1719   b->rowvalues    = 0;
1720   b->getrowactive = PETSC_FALSE;
1721 
1722   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1723                                      "MatStoreValues_MPIAIJ",
1724                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1725   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1726                                      "MatRetrieveValues_MPIAIJ",
1727                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1728   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1729                                      "MatGetDiagonalBlock_MPIAIJ",
1730                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 EXTERN_C_END
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1738 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1739 {
1740   Mat        mat;
1741   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1742   int        ierr;
1743 
1744   PetscFunctionBegin;
1745   *newmat       = 0;
1746   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1747   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1748   a    = (Mat_MPIAIJ*)mat->data;
1749   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1750   mat->factor       = matin->factor;
1751   mat->assembled    = PETSC_TRUE;
1752   mat->insertmode   = NOT_SET_VALUES;
1753   mat->preallocated = PETSC_TRUE;
1754 
1755   a->rstart       = oldmat->rstart;
1756   a->rend         = oldmat->rend;
1757   a->cstart       = oldmat->cstart;
1758   a->cend         = oldmat->cend;
1759   a->size         = oldmat->size;
1760   a->rank         = oldmat->rank;
1761   a->donotstash   = oldmat->donotstash;
1762   a->roworiented  = oldmat->roworiented;
1763   a->rowindices   = 0;
1764   a->rowvalues    = 0;
1765   a->getrowactive = PETSC_FALSE;
1766 
1767   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1768   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1769   if (oldmat->colmap) {
1770 #if defined (PETSC_USE_CTABLE)
1771     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1772 #else
1773     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1774     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1775     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1776 #endif
1777   } else a->colmap = 0;
1778   if (oldmat->garray) {
1779     int len;
1780     len  = oldmat->B->n;
1781     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1782     PetscLogObjectMemory(mat,len*sizeof(int));
1783     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1784   } else a->garray = 0;
1785 
1786   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1787   PetscLogObjectParent(mat,a->lvec);
1788   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1789   PetscLogObjectParent(mat,a->Mvctx);
1790   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1791   PetscLogObjectParent(mat,a->A);
1792   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1793   PetscLogObjectParent(mat,a->B);
1794   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1795   *newmat = mat;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #include "petscsys.h"
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatLoad_MPIAIJ"
1804 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1805 {
1806   Mat          A;
1807   PetscScalar  *vals,*svals;
1808   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1809   MPI_Status   status;
1810   int          i,nz,ierr,j,rstart,rend,fd;
1811   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1812   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1813   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1814 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1815   PetscTruth   flag;
1816 #endif
1817 
1818   PetscFunctionBegin;
1819   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1820   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1821   if (!rank) {
1822     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1823     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1824     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1825     if (header[3] < 0) {
1826       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1827     }
1828   }
1829 
1830   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1831   M = header[1]; N = header[2];
1832   /* determine ownership of all rows */
1833   m = M/size + ((M % size) > rank);
1834   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1835   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1836   rowners[0] = 0;
1837   for (i=2; i<=size; i++) {
1838     rowners[i] += rowners[i-1];
1839   }
1840   rstart = rowners[rank];
1841   rend   = rowners[rank+1];
1842 
1843   /* distribute row lengths to all processors */
1844   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1845   offlens = ourlens + (rend-rstart);
1846   if (!rank) {
1847     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1848     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1849     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1850     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1851     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1852     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1853   } else {
1854     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1855   }
1856 
1857   if (!rank) {
1858     /* calculate the number of nonzeros on each processor */
1859     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1860     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1861     for (i=0; i<size; i++) {
1862       for (j=rowners[i]; j< rowners[i+1]; j++) {
1863         procsnz[i] += rowlengths[j];
1864       }
1865     }
1866     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1867 
1868     /* determine max buffer needed and allocate it */
1869     maxnz = 0;
1870     for (i=0; i<size; i++) {
1871       maxnz = PetscMax(maxnz,procsnz[i]);
1872     }
1873     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1874 
1875     /* read in my part of the matrix column indices  */
1876     nz   = procsnz[0];
1877     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1878     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1879 
1880     /* read in every one elses and ship off */
1881     for (i=1; i<size; i++) {
1882       nz   = procsnz[i];
1883       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1884       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1885     }
1886     ierr = PetscFree(cols);CHKERRQ(ierr);
1887   } else {
1888     /* determine buffer space needed for message */
1889     nz = 0;
1890     for (i=0; i<m; i++) {
1891       nz += ourlens[i];
1892     }
1893     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1894 
1895     /* receive message of column indices*/
1896     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1897     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1898     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1899   }
1900 
1901   /* determine column ownership if matrix is not square */
1902   if (N != M) {
1903     n      = N/size + ((N % size) > rank);
1904     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1905     cstart = cend - n;
1906   } else {
1907     cstart = rstart;
1908     cend   = rend;
1909     n      = cend - cstart;
1910   }
1911 
1912   /* loop over local rows, determining number of off diagonal entries */
1913   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1914   jj = 0;
1915   for (i=0; i<m; i++) {
1916     for (j=0; j<ourlens[i]; j++) {
1917       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1918       jj++;
1919     }
1920   }
1921 
1922   /* create our matrix */
1923   for (i=0; i<m; i++) {
1924     ourlens[i] -= offlens[i];
1925   }
1926   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1927   A = *newmat;
1928   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1929   for (i=0; i<m; i++) {
1930     ourlens[i] += offlens[i];
1931   }
1932 
1933   if (!rank) {
1934     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1935 
1936     /* read in my part of the matrix numerical values  */
1937     nz   = procsnz[0];
1938     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1939 
1940     /* insert into matrix */
1941     jj      = rstart;
1942     smycols = mycols;
1943     svals   = vals;
1944     for (i=0; i<m; i++) {
1945       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1946       smycols += ourlens[i];
1947       svals   += ourlens[i];
1948       jj++;
1949     }
1950 
1951     /* read in other processors and ship out */
1952     for (i=1; i<size; i++) {
1953       nz   = procsnz[i];
1954       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1955       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1956     }
1957     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1958   } else {
1959     /* receive numeric values */
1960     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1961 
1962     /* receive message of values*/
1963     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1964     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1965     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1966 
1967     /* insert into matrix */
1968     jj      = rstart;
1969     smycols = mycols;
1970     svals   = vals;
1971     for (i=0; i<m; i++) {
1972       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1973       smycols += ourlens[i];
1974       svals   += ourlens[i];
1975       jj++;
1976     }
1977   }
1978   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1979   ierr = PetscFree(vals);CHKERRQ(ierr);
1980   ierr = PetscFree(mycols);CHKERRQ(ierr);
1981   ierr = PetscFree(rowners);CHKERRQ(ierr);
1982 
1983   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1985 #if defined(PETSC_HAVE_SPOOLES)
1986   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
1987   if (flag) {
1988     if (size == 1) {
1989       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
1990     } else {
1991       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
1992     }
1993   }
1994 #endif
1995 #if defined(PETSC_HAVE_SUPERLUDIST)
1996   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
1997   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
1998 #endif
1999   PetscFunctionReturn(0);
2000 }
2001 EXTERN_C_END
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2005 /*
2006     Not great since it makes two copies of the submatrix, first an SeqAIJ
2007   in local and then by concatenating the local matrices the end result.
2008   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2009 */
2010 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2011 {
2012   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2013   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2014   Mat          *local,M,Mreuse;
2015   PetscScalar  *vwork,*aa;
2016   MPI_Comm     comm = mat->comm;
2017   Mat_SeqAIJ   *aij;
2018 
2019 
2020   PetscFunctionBegin;
2021   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2023 
2024   if (call ==  MAT_REUSE_MATRIX) {
2025     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2026     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2027     local = &Mreuse;
2028     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2029   } else {
2030     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2031     Mreuse = *local;
2032     ierr   = PetscFree(local);CHKERRQ(ierr);
2033   }
2034 
2035   /*
2036       m - number of local rows
2037       n - number of columns (same on all processors)
2038       rstart - first row in new global matrix generated
2039   */
2040   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2041   if (call == MAT_INITIAL_MATRIX) {
2042     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2043     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2044     ii  = aij->i;
2045     jj  = aij->j;
2046 
2047     /*
2048         Determine the number of non-zeros in the diagonal and off-diagonal
2049         portions of the matrix in order to do correct preallocation
2050     */
2051 
2052     /* first get start and end of "diagonal" columns */
2053     if (csize == PETSC_DECIDE) {
2054       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2055       if (mglobal == n) { /* square matrix */
2056 	nlocal = m;
2057       } else {
2058         nlocal = n/size + ((n % size) > rank);
2059       }
2060     } else {
2061       nlocal = csize;
2062     }
2063     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2064     rstart = rend - nlocal;
2065     if (rank == size - 1 && rend != n) {
2066       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2067     }
2068 
2069     /* next, compute all the lengths */
2070     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2071     olens = dlens + m;
2072     for (i=0; i<m; i++) {
2073       jend = ii[i+1] - ii[i];
2074       olen = 0;
2075       dlen = 0;
2076       for (j=0; j<jend; j++) {
2077         if (*jj < rstart || *jj >= rend) olen++;
2078         else dlen++;
2079         jj++;
2080       }
2081       olens[i] = olen;
2082       dlens[i] = dlen;
2083     }
2084     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2085     ierr = PetscFree(dlens);CHKERRQ(ierr);
2086   } else {
2087     int ml,nl;
2088 
2089     M = *newmat;
2090     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2091     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2092     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2093     /*
2094          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2095        rather than the slower MatSetValues().
2096     */
2097     M->was_assembled = PETSC_TRUE;
2098     M->assembled     = PETSC_FALSE;
2099   }
2100   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2101   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2102   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2103   ii  = aij->i;
2104   jj  = aij->j;
2105   aa  = aij->a;
2106   for (i=0; i<m; i++) {
2107     row   = rstart + i;
2108     nz    = ii[i+1] - ii[i];
2109     cwork = jj;     jj += nz;
2110     vwork = aa;     aa += nz;
2111     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2112   }
2113 
2114   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2115   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2116   *newmat = M;
2117 
2118   /* save submatrix used in processor for next request */
2119   if (call ==  MAT_INITIAL_MATRIX) {
2120     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2121     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2122   }
2123 
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2129 /*@C
2130    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2131    (the default parallel PETSc format).  For good matrix assembly performance
2132    the user should preallocate the matrix storage by setting the parameters
2133    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2134    performance can be increased by more than a factor of 50.
2135 
2136    Collective on MPI_Comm
2137 
2138    Input Parameters:
2139 +  A - the matrix
2140 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2141            (same value is used for all local rows)
2142 .  d_nnz - array containing the number of nonzeros in the various rows of the
2143            DIAGONAL portion of the local submatrix (possibly different for each row)
2144            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2145            The size of this array is equal to the number of local rows, i.e 'm'.
2146            You must leave room for the diagonal entry even if it is zero.
2147 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2148            submatrix (same value is used for all local rows).
2149 -  o_nnz - array containing the number of nonzeros in the various rows of the
2150            OFF-DIAGONAL portion of the local submatrix (possibly different for
2151            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2152            structure. The size of this array is equal to the number
2153            of local rows, i.e 'm'.
2154 
2155    The AIJ format (also called the Yale sparse matrix format or
2156    compressed row storage), is fully compatible with standard Fortran 77
2157    storage.  That is, the stored row and column indices can begin at
2158    either one (as in Fortran) or zero.  See the users manual for details.
2159 
2160    The user MUST specify either the local or global matrix dimensions
2161    (possibly both).
2162 
2163    The parallel matrix is partitioned such that the first m0 rows belong to
2164    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2165    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2166 
2167    The DIAGONAL portion of the local submatrix of a processor can be defined
2168    as the submatrix which is obtained by extraction the part corresponding
2169    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2170    first row that belongs to the processor, and r2 is the last row belonging
2171    to the this processor. This is a square mxm matrix. The remaining portion
2172    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2173 
2174    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2175 
2176    By default, this format uses inodes (identical nodes) when possible.
2177    We search for consecutive rows with the same nonzero structure, thereby
2178    reusing matrix information to achieve increased efficiency.
2179 
2180    Options Database Keys:
2181 +  -mat_aij_no_inode  - Do not use inodes
2182 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2183 -  -mat_aij_oneindex - Internally use indexing starting at 1
2184         rather than 0.  Note that when calling MatSetValues(),
2185         the user still MUST index entries starting at 0!
2186 
2187    Example usage:
2188 
2189    Consider the following 8x8 matrix with 34 non-zero values, that is
2190    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2191    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2192    as follows:
2193 
2194 .vb
2195             1  2  0  |  0  3  0  |  0  4
2196     Proc0   0  5  6  |  7  0  0  |  8  0
2197             9  0 10  | 11  0  0  | 12  0
2198     -------------------------------------
2199            13  0 14  | 15 16 17  |  0  0
2200     Proc1   0 18  0  | 19 20 21  |  0  0
2201             0  0  0  | 22 23  0  | 24  0
2202     -------------------------------------
2203     Proc2  25 26 27  |  0  0 28  | 29  0
2204            30  0  0  | 31 32 33  |  0 34
2205 .ve
2206 
2207    This can be represented as a collection of submatrices as:
2208 
2209 .vb
2210       A B C
2211       D E F
2212       G H I
2213 .ve
2214 
2215    Where the submatrices A,B,C are owned by proc0, D,E,F are
2216    owned by proc1, G,H,I are owned by proc2.
2217 
2218    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2219    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2220    The 'M','N' parameters are 8,8, and have the same values on all procs.
2221 
2222    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2223    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2224    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2225    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2226    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2227    matrix, ans [DF] as another SeqAIJ matrix.
2228 
2229    When d_nz, o_nz parameters are specified, d_nz storage elements are
2230    allocated for every row of the local diagonal submatrix, and o_nz
2231    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2232    One way to choose d_nz and o_nz is to use the max nonzerors per local
2233    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2234    In this case, the values of d_nz,o_nz are:
2235 .vb
2236      proc0 : dnz = 2, o_nz = 2
2237      proc1 : dnz = 3, o_nz = 2
2238      proc2 : dnz = 1, o_nz = 4
2239 .ve
2240    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2241    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2242    for proc3. i.e we are using 12+15+10=37 storage locations to store
2243    34 values.
2244 
2245    When d_nnz, o_nnz parameters are specified, the storage is specified
2246    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2247    In the above case the values for d_nnz,o_nnz are:
2248 .vb
2249      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2250      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2251      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2252 .ve
2253    Here the space allocated is sum of all the above values i.e 34, and
2254    hence pre-allocation is perfect.
2255 
2256    Level: intermediate
2257 
2258 .keywords: matrix, aij, compressed row, sparse, parallel
2259 
2260 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2261 @*/
2262 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2263 {
2264   Mat_MPIAIJ   *b;
2265   int          ierr,i;
2266   PetscTruth   flg2;
2267 
2268   PetscFunctionBegin;
2269   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2270   if (!flg2) PetscFunctionReturn(0);
2271   B->preallocated = PETSC_TRUE;
2272   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2273   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2274   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2275   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2276   if (d_nnz) {
2277     for (i=0; i<B->m; i++) {
2278       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2279     }
2280   }
2281   if (o_nnz) {
2282     for (i=0; i<B->m; i++) {
2283       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2284     }
2285   }
2286   b = (Mat_MPIAIJ*)B->data;
2287 
2288   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2289   PetscLogObjectParent(B,b->A);
2290   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2291   PetscLogObjectParent(B,b->B);
2292 
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatCreateMPIAIJ"
2298 /*@C
2299    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2300    (the default parallel PETSc format).  For good matrix assembly performance
2301    the user should preallocate the matrix storage by setting the parameters
2302    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2303    performance can be increased by more than a factor of 50.
2304 
2305    Collective on MPI_Comm
2306 
2307    Input Parameters:
2308 +  comm - MPI communicator
2309 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2310            This value should be the same as the local size used in creating the
2311            y vector for the matrix-vector product y = Ax.
2312 .  n - This value should be the same as the local size used in creating the
2313        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2314        calculated if N is given) For square matrices n is almost always m.
2315 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2316 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2317 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2318            (same value is used for all local rows)
2319 .  d_nnz - array containing the number of nonzeros in the various rows of the
2320            DIAGONAL portion of the local submatrix (possibly different for each row)
2321            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2322            The size of this array is equal to the number of local rows, i.e 'm'.
2323            You must leave room for the diagonal entry even if it is zero.
2324 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2325            submatrix (same value is used for all local rows).
2326 -  o_nnz - array containing the number of nonzeros in the various rows of the
2327            OFF-DIAGONAL portion of the local submatrix (possibly different for
2328            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2329            structure. The size of this array is equal to the number
2330            of local rows, i.e 'm'.
2331 
2332    Output Parameter:
2333 .  A - the matrix
2334 
2335    Notes:
2336    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2337    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2338    storage requirements for this matrix.
2339 
2340    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2341    processor than it must be used on all processors that share the object for
2342    that argument.
2343 
2344    The AIJ format (also called the Yale sparse matrix format or
2345    compressed row storage), is fully compatible with standard Fortran 77
2346    storage.  That is, the stored row and column indices can begin at
2347    either one (as in Fortran) or zero.  See the users manual for details.
2348 
2349    The user MUST specify either the local or global matrix dimensions
2350    (possibly both).
2351 
2352    The parallel matrix is partitioned such that the first m0 rows belong to
2353    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2354    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2355 
2356    The DIAGONAL portion of the local submatrix of a processor can be defined
2357    as the submatrix which is obtained by extraction the part corresponding
2358    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2359    first row that belongs to the processor, and r2 is the last row belonging
2360    to the this processor. This is a square mxm matrix. The remaining portion
2361    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2362 
2363    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2364 
2365    By default, this format uses inodes (identical nodes) when possible.
2366    We search for consecutive rows with the same nonzero structure, thereby
2367    reusing matrix information to achieve increased efficiency.
2368 
2369    Options Database Keys:
2370 +  -mat_aij_no_inode  - Do not use inodes
2371 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2372 -  -mat_aij_oneindex - Internally use indexing starting at 1
2373         rather than 0.  Note that when calling MatSetValues(),
2374         the user still MUST index entries starting at 0!
2375 
2376 
2377    Example usage:
2378 
2379    Consider the following 8x8 matrix with 34 non-zero values, that is
2380    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2381    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2382    as follows:
2383 
2384 .vb
2385             1  2  0  |  0  3  0  |  0  4
2386     Proc0   0  5  6  |  7  0  0  |  8  0
2387             9  0 10  | 11  0  0  | 12  0
2388     -------------------------------------
2389            13  0 14  | 15 16 17  |  0  0
2390     Proc1   0 18  0  | 19 20 21  |  0  0
2391             0  0  0  | 22 23  0  | 24  0
2392     -------------------------------------
2393     Proc2  25 26 27  |  0  0 28  | 29  0
2394            30  0  0  | 31 32 33  |  0 34
2395 .ve
2396 
2397    This can be represented as a collection of submatrices as:
2398 
2399 .vb
2400       A B C
2401       D E F
2402       G H I
2403 .ve
2404 
2405    Where the submatrices A,B,C are owned by proc0, D,E,F are
2406    owned by proc1, G,H,I are owned by proc2.
2407 
2408    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2409    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2410    The 'M','N' parameters are 8,8, and have the same values on all procs.
2411 
2412    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2413    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2414    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2415    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2416    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2417    matrix, ans [DF] as another SeqAIJ matrix.
2418 
2419    When d_nz, o_nz parameters are specified, d_nz storage elements are
2420    allocated for every row of the local diagonal submatrix, and o_nz
2421    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2422    One way to choose d_nz and o_nz is to use the max nonzerors per local
2423    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2424    In this case, the values of d_nz,o_nz are:
2425 .vb
2426      proc0 : dnz = 2, o_nz = 2
2427      proc1 : dnz = 3, o_nz = 2
2428      proc2 : dnz = 1, o_nz = 4
2429 .ve
2430    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2431    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2432    for proc3. i.e we are using 12+15+10=37 storage locations to store
2433    34 values.
2434 
2435    When d_nnz, o_nnz parameters are specified, the storage is specified
2436    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2437    In the above case the values for d_nnz,o_nnz are:
2438 .vb
2439      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2440      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2441      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2442 .ve
2443    Here the space allocated is sum of all the above values i.e 34, and
2444    hence pre-allocation is perfect.
2445 
2446    Level: intermediate
2447 
2448 .keywords: matrix, aij, compressed row, sparse, parallel
2449 
2450 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2451 @*/
2452 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2453 {
2454   int ierr,size;
2455 
2456   PetscFunctionBegin;
2457   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2459   if (size > 1) {
2460     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2461     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2462   } else {
2463     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2464     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2471 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2472 {
2473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2474   PetscFunctionBegin;
2475   *Ad     = a->A;
2476   *Ao     = a->B;
2477   *colmap = a->garray;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2483 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2484 {
2485   int        ierr,i;
2486   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2487 
2488   PetscFunctionBegin;
2489   if (coloring->ctype == IS_COLORING_LOCAL) {
2490     ISColoringValue *allcolors,*colors;
2491     ISColoring      ocoloring;
2492 
2493     /* set coloring for diagonal portion */
2494     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2495 
2496     /* set coloring for off-diagonal portion */
2497     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2498     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2499     for (i=0; i<a->B->n; i++) {
2500       colors[i] = allcolors[a->garray[i]];
2501     }
2502     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2503     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2504     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2505     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2506   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2507     ISColoringValue *colors;
2508     int             *larray;
2509     ISColoring      ocoloring;
2510 
2511     /* set coloring for diagonal portion */
2512     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2513     for (i=0; i<a->A->n; i++) {
2514       larray[i] = i + a->cstart;
2515     }
2516     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2517     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2518     for (i=0; i<a->A->n; i++) {
2519       colors[i] = coloring->colors[larray[i]];
2520     }
2521     ierr = PetscFree(larray);CHKERRQ(ierr);
2522     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2523     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2524     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2525 
2526     /* set coloring for off-diagonal portion */
2527     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2528     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2529     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2530     for (i=0; i<a->B->n; i++) {
2531       colors[i] = coloring->colors[larray[i]];
2532     }
2533     ierr = PetscFree(larray);CHKERRQ(ierr);
2534     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2535     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2536     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2537   } else {
2538     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2539   }
2540 
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2546 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2547 {
2548   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2549   int        ierr;
2550 
2551   PetscFunctionBegin;
2552   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2553   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2559 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2560 {
2561   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2562   int        ierr;
2563 
2564   PetscFunctionBegin;
2565   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2566   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 #undef __FUNCT__
2571 #define __FUNCT__ "MatMerge"
2572 /*@C
2573       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2574                  matrices from each processor
2575 
2576     Collective on MPI_Comm
2577 
2578    Input Parameters:
2579 +    comm - the communicators the parallel matrix will live on
2580 -    inmat - the input sequential matrices
2581 
2582    Output Parameter:
2583 .    outmat - the parallel matrix generated
2584 
2585     Level: advanced
2586 
2587    Notes: The number of columns of the matrix in EACH of the seperate files
2588       MUST be the same.
2589 
2590 @*/
2591 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2592 {
2593   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2594   PetscScalar *values;
2595   PetscMap    columnmap,rowmap;
2596 
2597   PetscFunctionBegin;
2598 
2599   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2600 
2601   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2602   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2603   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2604   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2605   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2606   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2607 
2608   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2609   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2610   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2611   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2612   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2613 
2614   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2615   for (i=0;i<m;i++) {
2616     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2617     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2618     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2619   }
2620   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2621   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2622 
2623   for (i=0;i<m;i++) {
2624     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2625     I    = i + rstart;
2626     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2627     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2628   }
2629   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2630   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2631   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632 
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 #undef __FUNCT__
2637 #define __FUNCT__ "MatFileSplit"
2638 int MatFileSplit(Mat A,char *outfile)
2639 {
2640   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2641   PetscViewer out;
2642   char        *name;
2643   Mat         B;
2644   PetscScalar *values;
2645 
2646   PetscFunctionBegin;
2647 
2648   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2649   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2650   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2651   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2652   for (i=0;i<m;i++) {
2653     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2654     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2655     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2656   }
2657   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2658   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2659 
2660   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2661   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2662   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2663   sprintf(name,"%s.%d",outfile,rank);
2664   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2665   ierr = PetscFree(name);
2666   ierr = MatView(B,out);CHKERRQ(ierr);
2667   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2668   ierr = MatDestroy(B);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671