xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8229bfc2e272e22dd52541101af2fe6fdaaa77c5)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *nprocs,j,idx,nsends,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
455   for (i=0; i<N; i++) {
456     idx = rows[i];
457     found = PETSC_FALSE;
458     for (j=0; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461       }
462     }
463     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   PetscLogObjectParent(A,istmp);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583   int        ierr,nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638   int        ierr;
639 
640   PetscFunctionBegin;
641   /* do nondiagonal part */
642   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
643   /* send it on its way */
644   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   /* do local part */
646   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
647   /* receive remote parts: note this assumes the values are not actually */
648   /* inserted in yy until the next line, which is true for my implementation*/
649   /* but is not perhaps always true. */
650   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   This only works correctly for square matrices where the subblock A->A is the
656    diagonal block
657 */
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661 {
662   int        ierr;
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664 
665   PetscFunctionBegin;
666   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667   if (a->rstart != a->cstart || a->rend != a->cend) {
668     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669   }
670   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatScale_MPIAIJ"
676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
683   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDestroy_MPIAIJ"
689 int MatDestroy_MPIAIJ(Mat mat)
690 {
691   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695 #if defined(PETSC_USE_LOG)
696   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697 #endif
698   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
699   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
702 #if defined (PETSC_USE_CTABLE)
703   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
704 #else
705   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
706 #endif
707   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
708   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
709   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
710   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
711   ierr = PetscFree(aij);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_Binary"
720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
724   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
725   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727   PetscScalar       *column_values;
728 
729   PetscFunctionBegin;
730   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
731   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
732   nz   = A->nz + B->nz;
733   if (rank == 0) {
734     header[0] = MAT_FILE_COOKIE;
735     header[1] = mat->M;
736     header[2] = mat->N;
737     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
738     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
739     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
740     /* get largest number of rows any processor has */
741     rlen = mat->m;
742     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
743     for (i=1; i<size; i++) {
744       rlen = PetscMax(rlen,range[i+1] - range[i]);
745     }
746   } else {
747     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     rlen = mat->m;
749   }
750 
751   /* load up the local row counts */
752   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
753   for (i=0; i<mat->m; i++) {
754     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755   }
756 
757   /* store the row lengths to the file */
758   if (rank == 0) {
759     MPI_Status status;
760     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
761     for (i=1; i<size; i++) {
762       rlen = range[i+1] - range[i];
763       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
764       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
765     }
766   } else {
767     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
768   }
769   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
770 
771   /* load up the local column indices */
772   nzmax = nz; /* )th processor needs space a largest processor needs */
773   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
774   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
775   cnt  = 0;
776   for (i=0; i<mat->m; i++) {
777     for (j=B->i[i]; j<B->i[i+1]; j++) {
778       if ( (col = garray[B->j[j]]) > cstart) break;
779       column_indices[cnt++] = col;
780     }
781     for (k=A->i[i]; k<A->i[i+1]; k++) {
782       column_indices[cnt++] = A->j[k] + cstart;
783     }
784     for (; j<B->i[i+1]; j++) {
785       column_indices[cnt++] = garray[B->j[j]];
786     }
787   }
788   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
789 
790   /* store the column indices to the file */
791   if (rank == 0) {
792     MPI_Status status;
793     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
796       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
803   }
804   ierr = PetscFree(column_indices);CHKERRQ(ierr);
805 
806   /* load up the local column values */
807   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
808   cnt  = 0;
809   for (i=0; i<mat->m; i++) {
810     for (j=B->i[i]; j<B->i[i+1]; j++) {
811       if ( garray[B->j[j]] > cstart) break;
812       column_values[cnt++] = B->a[j];
813     }
814     for (k=A->i[i]; k<A->i[i+1]; k++) {
815       column_values[cnt++] = A->a[k];
816     }
817     for (; j<B->i[i+1]; j++) {
818       column_values[cnt++] = B->a[j];
819     }
820   }
821   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
822 
823   /* store the column values to the file */
824   if (rank == 0) {
825     MPI_Status status;
826     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
827     for (i=1; i<size; i++) {
828       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
829       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
831       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
832     }
833   } else {
834     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
835     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
836   }
837   ierr = PetscFree(column_values);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844 {
845   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
846   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
847   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848   PetscTruth        isdraw,isascii,flg,isbinary;
849   PetscViewer       sviewer;
850   PetscViewerFormat format;
851 
852   PetscFunctionBegin;
853   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
856   if (isascii) {
857     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
858     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859       MatInfo info;
860       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
861       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
862       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
863       if (flg) {
864         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
865 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
866       } else {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
868 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
869       }
870       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
872       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
873       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
874       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
875       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
876       PetscFunctionReturn(0);
877     } else if (format == PETSC_VIEWER_ASCII_INFO) {
878       PetscFunctionReturn(0);
879     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
882 #endif
883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
884       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
885 #endif
886 
887       PetscFunctionReturn(0);
888     }
889   } else if (isbinary) {
890     if (size == 1) {
891       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
892       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
893     } else {
894       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
895     }
896     PetscFunctionReturn(0);
897   } else if (isdraw) {
898     PetscDraw  draw;
899     PetscTruth isnull;
900     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
901     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
902   }
903 
904   if (size == 1) {
905     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
906     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
907   } else {
908     /* assemble the entire matrix onto first processor. */
909     Mat         A;
910     Mat_SeqAIJ *Aloc;
911     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912     PetscScalar *a;
913 
914     if (!rank) {
915       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
916     } else {
917       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
918     }
919     PetscLogObjectParent(mat,A);
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqAIJ*)aij->A->data;
923     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     row = aij->rstart;
925     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926     for (i=0; i<m; i++) {
927       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
928       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929     }
930     aj = Aloc->j;
931     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
932 
933     /* copy over the B part */
934     Aloc = (Mat_SeqAIJ*)aij->B->data;
935     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936     row  = aij->rstart;
937     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
938     ct   = cols;
939     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940     for (i=0; i<m; i++) {
941       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
942       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943     }
944     ierr = PetscFree(ct);CHKERRQ(ierr);
945     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947     /*
948        Everyone has to call to draw the matrix since the graphics waits are
949        synchronized across all processors that share the PetscDraw object
950     */
951     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
952     if (!rank) {
953       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
954       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
955     }
956     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
957     ierr = MatDestroy(A);CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatView_MPIAIJ"
964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965 {
966   int        ierr;
967   PetscTruth isascii,isdraw,issocket,isbinary;
968 
969   PetscFunctionBegin;
970   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
971   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
974   if (isascii || isdraw || isbinary || issocket) {
975     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
976   } else {
977     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatRelax_MPIAIJ"
986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987 {
988   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
989   int          ierr;
990   Vec          bb1;
991   PetscScalar  mone=-1.0;
992 
993   PetscFunctionBegin;
994   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
995 
996   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
997 
998   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999     if (flag & SOR_ZERO_INITIAL_GUESS) {
1000       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1001       its--;
1002     }
1003 
1004     while (its--) {
1005       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1006       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1007 
1008       /* update rhs: bb1 = bb - B*x */
1009       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1010       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1011 
1012       /* local sweep */
1013       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014       CHKERRQ(ierr);
1015     }
1016   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1019       its--;
1020     }
1021     while (its--) {
1022       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1023       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1024 
1025       /* update rhs: bb1 = bb - B*x */
1026       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1027       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1028 
1029       /* local sweep */
1030       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031       CHKERRQ(ierr);
1032     }
1033   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034     if (flag & SOR_ZERO_INITIAL_GUESS) {
1035       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1036       its--;
1037     }
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else {
1051     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052   }
1053 
1054   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061 {
1062   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063   Mat        A = mat->A,B = mat->B;
1064   int        ierr;
1065   PetscReal  isend[5],irecv[5];
1066 
1067   PetscFunctionBegin;
1068   info->block_size     = 1.0;
1069   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1070   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071   isend[3] = info->memory;  isend[4] = info->mallocs;
1072   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1073   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074   isend[3] += info->memory;  isend[4] += info->mallocs;
1075   if (flag == MAT_LOCAL) {
1076     info->nz_used      = isend[0];
1077     info->nz_allocated = isend[1];
1078     info->nz_unneeded  = isend[2];
1079     info->memory       = isend[3];
1080     info->mallocs      = isend[4];
1081   } else if (flag == MAT_GLOBAL_MAX) {
1082     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1083     info->nz_used      = irecv[0];
1084     info->nz_allocated = irecv[1];
1085     info->nz_unneeded  = irecv[2];
1086     info->memory       = irecv[3];
1087     info->mallocs      = irecv[4];
1088   } else if (flag == MAT_GLOBAL_SUM) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   }
1096   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1097   info->fill_ratio_needed = 0;
1098   info->factor_mallocs    = 0;
1099   info->rows_global       = (double)matin->M;
1100   info->columns_global    = (double)matin->N;
1101   info->rows_local        = (double)matin->m;
1102   info->columns_local     = (double)matin->N;
1103 
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatSetOption_MPIAIJ"
1109 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110 {
1111   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112   int        ierr;
1113 
1114   PetscFunctionBegin;
1115   switch (op) {
1116   case MAT_NO_NEW_NONZERO_LOCATIONS:
1117   case MAT_YES_NEW_NONZERO_LOCATIONS:
1118   case MAT_COLUMNS_UNSORTED:
1119   case MAT_COLUMNS_SORTED:
1120   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121   case MAT_KEEP_ZEROED_ROWS:
1122   case MAT_NEW_NONZERO_LOCATION_ERR:
1123   case MAT_USE_INODES:
1124   case MAT_DO_NOT_USE_INODES:
1125   case MAT_IGNORE_ZERO_ENTRIES:
1126     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1127     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1128     break;
1129   case MAT_ROW_ORIENTED:
1130     a->roworiented = PETSC_TRUE;
1131     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1132     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1133     break;
1134   case MAT_ROWS_SORTED:
1135   case MAT_ROWS_UNSORTED:
1136   case MAT_YES_NEW_DIAGONALS:
1137     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1138     break;
1139   case MAT_COLUMN_ORIENTED:
1140     a->roworiented = PETSC_FALSE;
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_IGNORE_OFF_PROC_ENTRIES:
1145     a->donotstash = PETSC_TRUE;
1146     break;
1147   case MAT_NO_NEW_DIAGONALS:
1148     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1149   default:
1150     SETERRQ(PETSC_ERR_SUP,"unknown option");
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatGetRow_MPIAIJ"
1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1158 {
1159   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1160   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1161   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1162   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1163   int          *cmap,*idx_p;
1164 
1165   PetscFunctionBegin;
1166   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1167   mat->getrowactive = PETSC_TRUE;
1168 
1169   if (!mat->rowvalues && (idx || v)) {
1170     /*
1171         allocate enough space to hold information from the longest row.
1172     */
1173     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1174     int     max = 1,tmp;
1175     for (i=0; i<matin->m; i++) {
1176       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1177       if (max < tmp) { max = tmp; }
1178     }
1179     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1180     mat->rowindices = (int*)(mat->rowvalues + max);
1181   }
1182 
1183   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1184   lrow = row - rstart;
1185 
1186   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187   if (!v)   {pvA = 0; pvB = 0;}
1188   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1190   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1191   nztot = nzA + nzB;
1192 
1193   cmap  = mat->garray;
1194   if (v  || idx) {
1195     if (nztot) {
1196       /* Sort by increasing column numbers, assuming A and B already sorted */
1197       int imark = -1;
1198       if (v) {
1199         *v = v_p = mat->rowvalues;
1200         for (i=0; i<nzB; i++) {
1201           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202           else break;
1203         }
1204         imark = i;
1205         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1206         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1207       }
1208       if (idx) {
1209         *idx = idx_p = mat->rowindices;
1210         if (imark > -1) {
1211           for (i=0; i<imark; i++) {
1212             idx_p[i] = cmap[cworkB[i]];
1213           }
1214         } else {
1215           for (i=0; i<nzB; i++) {
1216             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217             else break;
1218           }
1219           imark = i;
1220         }
1221         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1222         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1223       }
1224     } else {
1225       if (idx) *idx = 0;
1226       if (v)   *v   = 0;
1227     }
1228   }
1229   *nz = nztot;
1230   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1231   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1238 {
1239   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1240 
1241   PetscFunctionBegin;
1242   if (aij->getrowactive == PETSC_FALSE) {
1243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1244   }
1245   aij->getrowactive = PETSC_FALSE;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatNorm_MPIAIJ"
1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1252 {
1253   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1254   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1255   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1256   PetscReal    sum = 0.0;
1257   PetscScalar  *v;
1258 
1259   PetscFunctionBegin;
1260   if (aij->size == 1) {
1261     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1262   } else {
1263     if (type == NORM_FROBENIUS) {
1264       v = amat->a;
1265       for (i=0; i<amat->nz; i++) {
1266 #if defined(PETSC_USE_COMPLEX)
1267         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1268 #else
1269         sum += (*v)*(*v); v++;
1270 #endif
1271       }
1272       v = bmat->a;
1273       for (i=0; i<bmat->nz; i++) {
1274 #if defined(PETSC_USE_COMPLEX)
1275         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1276 #else
1277         sum += (*v)*(*v); v++;
1278 #endif
1279       }
1280       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1281       *norm = sqrt(*norm);
1282     } else if (type == NORM_1) { /* max column norm */
1283       PetscReal *tmp,*tmp2;
1284       int    *jj,*garray = aij->garray;
1285       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1286       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1287       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1288       *norm = 0.0;
1289       v = amat->a; jj = amat->j;
1290       for (j=0; j<amat->nz; j++) {
1291         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1292       }
1293       v = bmat->a; jj = bmat->j;
1294       for (j=0; j<bmat->nz; j++) {
1295         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1296       }
1297       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1298       for (j=0; j<mat->N; j++) {
1299         if (tmp2[j] > *norm) *norm = tmp2[j];
1300       }
1301       ierr = PetscFree(tmp);CHKERRQ(ierr);
1302       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1303     } else if (type == NORM_INFINITY) { /* max row norm */
1304       PetscReal ntemp = 0.0;
1305       for (j=0; j<aij->A->m; j++) {
1306         v = amat->a + amat->i[j] + shift;
1307         sum = 0.0;
1308         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1309           sum += PetscAbsScalar(*v); v++;
1310         }
1311         v = bmat->a + bmat->i[j] + shift;
1312         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1313           sum += PetscAbsScalar(*v); v++;
1314         }
1315         if (sum > ntemp) ntemp = sum;
1316       }
1317       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1318     } else {
1319       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatTranspose_MPIAIJ"
1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328 {
1329   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1330   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1331   int          ierr,shift = Aloc->indexshift;
1332   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1333   Mat          B;
1334   PetscScalar  *array;
1335 
1336   PetscFunctionBegin;
1337   if (!matout && M != N) {
1338     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1339   }
1340 
1341   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1342 
1343   /* copy over the A part */
1344   Aloc = (Mat_SeqAIJ*)a->A->data;
1345   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346   row = a->rstart;
1347   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1348   for (i=0; i<m; i++) {
1349     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1350     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1351   }
1352   aj = Aloc->j;
1353   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1354 
1355   /* copy over the B part */
1356   Aloc = (Mat_SeqAIJ*)a->B->data;
1357   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1358   row  = a->rstart;
1359   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1360   ct   = cols;
1361   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1362   for (i=0; i<m; i++) {
1363     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1364     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1365   }
1366   ierr = PetscFree(ct);CHKERRQ(ierr);
1367   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1368   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369   if (matout) {
1370     *matout = B;
1371   } else {
1372     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380 {
1381   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1382   Mat        a = aij->A,b = aij->B;
1383   int        ierr,s1,s2,s3;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1387   if (rr) {
1388     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1389     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390     /* Overlap communication with computation. */
1391     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1392   }
1393   if (ll) {
1394     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1395     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1397   }
1398   /* scale  the diagonal block */
1399   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1400 
1401   if (rr) {
1402     /* Do a scatter end and then right scale the off-diagonal block */
1403     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1404     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1405   }
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1413 int MatPrintHelp_MPIAIJ(Mat A)
1414 {
1415   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1416   int        ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428 {
1429   PetscFunctionBegin;
1430   *bs = 1;
1431   PetscFunctionReturn(0);
1432 }
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1435 int MatSetUnfactored_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatEqual_MPIAIJ"
1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1448 {
1449   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1450   Mat        a,b,c,d;
1451   PetscTruth flg;
1452   int        ierr;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1456   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatCopy_MPIAIJ"
1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471 {
1472   int        ierr;
1473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475   PetscTruth flg;
1476 
1477   PetscFunctionBegin;
1478   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1479   if (str != SAME_NONZERO_PATTERN || !flg) {
1480     /* because of the column compression in the off-processor part of the matrix a->B,
1481        the number of columns in a->B and b->B may be different, hence we cannot call
1482        the MatCopy() directly on the two parts. If need be, we can provide a more
1483        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1484        then copying the submatrices */
1485     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1486   } else {
1487     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1488     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1495 int MatSetUpPreallocation_MPIAIJ(Mat A)
1496 {
1497   int        ierr;
1498 
1499   PetscFunctionBegin;
1500   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1511 #endif
1512 
1513 #include "petscblaslapack.h"
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatAXPY_MPIAIJ"
1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1518 {
1519   int        ierr,one;
1520   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1521   Mat_SeqAIJ *x,*y;
1522 
1523   PetscFunctionBegin;
1524   if (str == SAME_NONZERO_PATTERN) {
1525     x  = (Mat_SeqAIJ *)xx->A->data;
1526     y  = (Mat_SeqAIJ *)yy->A->data;
1527     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1528     x  = (Mat_SeqAIJ *)xx->B->data;
1529     y  = (Mat_SeqAIJ *)yy->B->data;
1530     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1531   } else {
1532     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 /* -------------------------------------------------------------------*/
1538 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1539        MatGetRow_MPIAIJ,
1540        MatRestoreRow_MPIAIJ,
1541        MatMult_MPIAIJ,
1542        MatMultAdd_MPIAIJ,
1543        MatMultTranspose_MPIAIJ,
1544        MatMultTransposeAdd_MPIAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549        0,
1550        0,
1551        MatRelax_MPIAIJ,
1552        MatTranspose_MPIAIJ,
1553        MatGetInfo_MPIAIJ,
1554        MatEqual_MPIAIJ,
1555        MatGetDiagonal_MPIAIJ,
1556        MatDiagonalScale_MPIAIJ,
1557        MatNorm_MPIAIJ,
1558        MatAssemblyBegin_MPIAIJ,
1559        MatAssemblyEnd_MPIAIJ,
1560        0,
1561        MatSetOption_MPIAIJ,
1562        MatZeroEntries_MPIAIJ,
1563        MatZeroRows_MPIAIJ,
1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1565        MatLUFactorSymbolic_MPIAIJ_TFS,
1566 #else
1567        0,
1568 #endif
1569        0,
1570        0,
1571        0,
1572        MatSetUpPreallocation_MPIAIJ,
1573        0,
1574        0,
1575        0,
1576        0,
1577        MatDuplicate_MPIAIJ,
1578        0,
1579        0,
1580        0,
1581        0,
1582        MatAXPY_MPIAIJ,
1583        MatGetSubMatrices_MPIAIJ,
1584        MatIncreaseOverlap_MPIAIJ,
1585        MatGetValues_MPIAIJ,
1586        MatCopy_MPIAIJ,
1587        MatPrintHelp_MPIAIJ,
1588        MatScale_MPIAIJ,
1589        0,
1590        0,
1591        0,
1592        MatGetBlockSize_MPIAIJ,
1593        0,
1594        0,
1595        0,
1596        0,
1597        MatFDColoringCreate_MPIAIJ,
1598        0,
1599        MatSetUnfactored_MPIAIJ,
1600        0,
1601        0,
1602        MatGetSubMatrix_MPIAIJ,
1603        MatDestroy_MPIAIJ,
1604        MatView_MPIAIJ,
1605        MatGetPetscMaps_Petsc,
1606        0,
1607        0,
1608        0,
1609        0,
1610        0,
1611        0,
1612        0,
1613        0,
1614        MatSetColoring_MPIAIJ,
1615        MatSetValuesAdic_MPIAIJ,
1616        MatSetValuesAdifor_MPIAIJ
1617 };
1618 
1619 /* ----------------------------------------------------------------------------------------*/
1620 
1621 EXTERN_C_BEGIN
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1624 int MatStoreValues_MPIAIJ(Mat mat)
1625 {
1626   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1627   int        ierr;
1628 
1629   PetscFunctionBegin;
1630   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1631   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 EXTERN_C_BEGIN
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1639 int MatRetrieveValues_MPIAIJ(Mat mat)
1640 {
1641   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1642   int        ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1646   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 EXTERN_C_END
1650 
1651 #include "petscpc.h"
1652 EXTERN_C_BEGIN
1653 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1654 EXTERN_C_END
1655 
1656 EXTERN_C_BEGIN
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatCreate_MPIAIJ"
1659 int MatCreate_MPIAIJ(Mat B)
1660 {
1661   Mat_MPIAIJ *b;
1662   int        ierr,i,size;
1663 
1664   PetscFunctionBegin;
1665   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1666 
1667   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1668   B->data         = (void*)b;
1669   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1670   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1671   B->factor       = 0;
1672   B->assembled    = PETSC_FALSE;
1673   B->mapping      = 0;
1674 
1675   B->insertmode      = NOT_SET_VALUES;
1676   b->size            = size;
1677   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1678 
1679   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1680   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1681 
1682   /* the information in the maps duplicates the information computed below, eventually
1683      we should remove the duplicate information that is not contained in the maps */
1684   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1685   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1686 
1687   /* build local table of row and column ownerships */
1688   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1689   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1690   b->cowners = b->rowners + b->size + 2;
1691   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1692   b->rowners[0] = 0;
1693   for (i=2; i<=b->size; i++) {
1694     b->rowners[i] += b->rowners[i-1];
1695   }
1696   b->rstart = b->rowners[b->rank];
1697   b->rend   = b->rowners[b->rank+1];
1698   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1699   b->cowners[0] = 0;
1700   for (i=2; i<=b->size; i++) {
1701     b->cowners[i] += b->cowners[i-1];
1702   }
1703   b->cstart = b->cowners[b->rank];
1704   b->cend   = b->cowners[b->rank+1];
1705 
1706   /* build cache for off array entries formed */
1707   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1708   b->donotstash  = PETSC_FALSE;
1709   b->colmap      = 0;
1710   b->garray      = 0;
1711   b->roworiented = PETSC_TRUE;
1712 
1713   /* stuff used for matrix vector multiply */
1714   b->lvec      = PETSC_NULL;
1715   b->Mvctx     = PETSC_NULL;
1716 
1717   /* stuff for MatGetRow() */
1718   b->rowindices   = 0;
1719   b->rowvalues    = 0;
1720   b->getrowactive = PETSC_FALSE;
1721 
1722   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1723                                      "MatStoreValues_MPIAIJ",
1724                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1725   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1726                                      "MatRetrieveValues_MPIAIJ",
1727                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1728   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1729                                      "MatGetDiagonalBlock_MPIAIJ",
1730                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 EXTERN_C_END
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1738 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1739 {
1740   Mat        mat;
1741   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1742   int        ierr;
1743 
1744   PetscFunctionBegin;
1745   *newmat       = 0;
1746   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1747   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1748   a    = (Mat_MPIAIJ*)mat->data;
1749   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1750   mat->factor       = matin->factor;
1751   mat->assembled    = PETSC_TRUE;
1752   mat->insertmode   = NOT_SET_VALUES;
1753   mat->preallocated = PETSC_TRUE;
1754 
1755   a->rstart       = oldmat->rstart;
1756   a->rend         = oldmat->rend;
1757   a->cstart       = oldmat->cstart;
1758   a->cend         = oldmat->cend;
1759   a->size         = oldmat->size;
1760   a->rank         = oldmat->rank;
1761   a->donotstash   = oldmat->donotstash;
1762   a->roworiented  = oldmat->roworiented;
1763   a->rowindices   = 0;
1764   a->rowvalues    = 0;
1765   a->getrowactive = PETSC_FALSE;
1766 
1767   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1768   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1769   if (oldmat->colmap) {
1770 #if defined (PETSC_USE_CTABLE)
1771     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1772 #else
1773     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1774     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1775     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1776 #endif
1777   } else a->colmap = 0;
1778   if (oldmat->garray) {
1779     int len;
1780     len  = oldmat->B->n;
1781     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1782     PetscLogObjectMemory(mat,len*sizeof(int));
1783     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1784   } else a->garray = 0;
1785 
1786   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1787   PetscLogObjectParent(mat,a->lvec);
1788   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1789   PetscLogObjectParent(mat,a->Mvctx);
1790   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1791   PetscLogObjectParent(mat,a->A);
1792   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1793   PetscLogObjectParent(mat,a->B);
1794   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1795   *newmat = mat;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #include "petscsys.h"
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatLoad_MPIAIJ"
1804 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1805 {
1806   Mat          A;
1807   PetscScalar  *vals,*svals;
1808   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1809   MPI_Status   status;
1810   int          i,nz,ierr,j,rstart,rend,fd;
1811   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1812   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1813   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1814 
1815   PetscFunctionBegin;
1816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1817   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1818   if (!rank) {
1819     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1820     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1821     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1822     if (header[3] < 0) {
1823       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1824     }
1825   }
1826 
1827   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1828   M = header[1]; N = header[2];
1829   /* determine ownership of all rows */
1830   m = M/size + ((M % size) > rank);
1831   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1832   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1833   rowners[0] = 0;
1834   for (i=2; i<=size; i++) {
1835     rowners[i] += rowners[i-1];
1836   }
1837   rstart = rowners[rank];
1838   rend   = rowners[rank+1];
1839 
1840   /* distribute row lengths to all processors */
1841   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1842   offlens = ourlens + (rend-rstart);
1843   if (!rank) {
1844     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1845     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1846     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1847     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1848     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1849     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1850   } else {
1851     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1852   }
1853 
1854   if (!rank) {
1855     /* calculate the number of nonzeros on each processor */
1856     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1857     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1858     for (i=0; i<size; i++) {
1859       for (j=rowners[i]; j< rowners[i+1]; j++) {
1860         procsnz[i] += rowlengths[j];
1861       }
1862     }
1863     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1864 
1865     /* determine max buffer needed and allocate it */
1866     maxnz = 0;
1867     for (i=0; i<size; i++) {
1868       maxnz = PetscMax(maxnz,procsnz[i]);
1869     }
1870     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1871 
1872     /* read in my part of the matrix column indices  */
1873     nz   = procsnz[0];
1874     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1875     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1876 
1877     /* read in every one elses and ship off */
1878     for (i=1; i<size; i++) {
1879       nz   = procsnz[i];
1880       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1881       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1882     }
1883     ierr = PetscFree(cols);CHKERRQ(ierr);
1884   } else {
1885     /* determine buffer space needed for message */
1886     nz = 0;
1887     for (i=0; i<m; i++) {
1888       nz += ourlens[i];
1889     }
1890     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1891 
1892     /* receive message of column indices*/
1893     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1894     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1895     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1896   }
1897 
1898   /* determine column ownership if matrix is not square */
1899   if (N != M) {
1900     n      = N/size + ((N % size) > rank);
1901     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1902     cstart = cend - n;
1903   } else {
1904     cstart = rstart;
1905     cend   = rend;
1906     n      = cend - cstart;
1907   }
1908 
1909   /* loop over local rows, determining number of off diagonal entries */
1910   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1911   jj = 0;
1912   for (i=0; i<m; i++) {
1913     for (j=0; j<ourlens[i]; j++) {
1914       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1915       jj++;
1916     }
1917   }
1918 
1919   /* create our matrix */
1920   for (i=0; i<m; i++) {
1921     ourlens[i] -= offlens[i];
1922   }
1923   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1924   A = *newmat;
1925   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1926   for (i=0; i<m; i++) {
1927     ourlens[i] += offlens[i];
1928   }
1929 
1930   if (!rank) {
1931     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1932 
1933     /* read in my part of the matrix numerical values  */
1934     nz   = procsnz[0];
1935     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1936 
1937     /* insert into matrix */
1938     jj      = rstart;
1939     smycols = mycols;
1940     svals   = vals;
1941     for (i=0; i<m; i++) {
1942       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1943       smycols += ourlens[i];
1944       svals   += ourlens[i];
1945       jj++;
1946     }
1947 
1948     /* read in other processors and ship out */
1949     for (i=1; i<size; i++) {
1950       nz   = procsnz[i];
1951       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1952       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1953     }
1954     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1955   } else {
1956     /* receive numeric values */
1957     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1958 
1959     /* receive message of values*/
1960     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1961     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1962     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1963 
1964     /* insert into matrix */
1965     jj      = rstart;
1966     smycols = mycols;
1967     svals   = vals;
1968     for (i=0; i<m; i++) {
1969       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1970       smycols += ourlens[i];
1971       svals   += ourlens[i];
1972       jj++;
1973     }
1974   }
1975   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1976   ierr = PetscFree(vals);CHKERRQ(ierr);
1977   ierr = PetscFree(mycols);CHKERRQ(ierr);
1978   ierr = PetscFree(rowners);CHKERRQ(ierr);
1979 
1980   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1981   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   PetscFunctionReturn(0);
1983 }
1984 EXTERN_C_END
1985 
1986 #undef __FUNCT__
1987 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1988 /*
1989     Not great since it makes two copies of the submatrix, first an SeqAIJ
1990   in local and then by concatenating the local matrices the end result.
1991   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1992 */
1993 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1994 {
1995   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1996   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1997   Mat          *local,M,Mreuse;
1998   PetscScalar  *vwork,*aa;
1999   MPI_Comm     comm = mat->comm;
2000   Mat_SeqAIJ   *aij;
2001 
2002 
2003   PetscFunctionBegin;
2004   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2005   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2006 
2007   if (call ==  MAT_REUSE_MATRIX) {
2008     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2009     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2010     local = &Mreuse;
2011     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2012   } else {
2013     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2014     Mreuse = *local;
2015     ierr   = PetscFree(local);CHKERRQ(ierr);
2016   }
2017 
2018   /*
2019       m - number of local rows
2020       n - number of columns (same on all processors)
2021       rstart - first row in new global matrix generated
2022   */
2023   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2024   if (call == MAT_INITIAL_MATRIX) {
2025     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2026     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2027     ii  = aij->i;
2028     jj  = aij->j;
2029 
2030     /*
2031         Determine the number of non-zeros in the diagonal and off-diagonal
2032         portions of the matrix in order to do correct preallocation
2033     */
2034 
2035     /* first get start and end of "diagonal" columns */
2036     if (csize == PETSC_DECIDE) {
2037       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2038       if (mglobal == n) { /* square matrix */
2039 	nlocal = m;
2040       } else {
2041         nlocal = n/size + ((n % size) > rank);
2042       }
2043     } else {
2044       nlocal = csize;
2045     }
2046     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2047     rstart = rend - nlocal;
2048     if (rank == size - 1 && rend != n) {
2049       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2050     }
2051 
2052     /* next, compute all the lengths */
2053     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2054     olens = dlens + m;
2055     for (i=0; i<m; i++) {
2056       jend = ii[i+1] - ii[i];
2057       olen = 0;
2058       dlen = 0;
2059       for (j=0; j<jend; j++) {
2060         if (*jj < rstart || *jj >= rend) olen++;
2061         else dlen++;
2062         jj++;
2063       }
2064       olens[i] = olen;
2065       dlens[i] = dlen;
2066     }
2067     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2068     ierr = PetscFree(dlens);CHKERRQ(ierr);
2069   } else {
2070     int ml,nl;
2071 
2072     M = *newmat;
2073     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2074     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2075     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2076     /*
2077          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2078        rather than the slower MatSetValues().
2079     */
2080     M->was_assembled = PETSC_TRUE;
2081     M->assembled     = PETSC_FALSE;
2082   }
2083   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2084   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2085   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2086   ii  = aij->i;
2087   jj  = aij->j;
2088   aa  = aij->a;
2089   for (i=0; i<m; i++) {
2090     row   = rstart + i;
2091     nz    = ii[i+1] - ii[i];
2092     cwork = jj;     jj += nz;
2093     vwork = aa;     aa += nz;
2094     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2095   }
2096 
2097   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2099   *newmat = M;
2100 
2101   /* save submatrix used in processor for next request */
2102   if (call ==  MAT_INITIAL_MATRIX) {
2103     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2104     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2105   }
2106 
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2112 /*@C
2113    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2114    (the default parallel PETSc format).  For good matrix assembly performance
2115    the user should preallocate the matrix storage by setting the parameters
2116    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2117    performance can be increased by more than a factor of 50.
2118 
2119    Collective on MPI_Comm
2120 
2121    Input Parameters:
2122 +  A - the matrix
2123 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2124            (same value is used for all local rows)
2125 .  d_nnz - array containing the number of nonzeros in the various rows of the
2126            DIAGONAL portion of the local submatrix (possibly different for each row)
2127            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2128            The size of this array is equal to the number of local rows, i.e 'm'.
2129            You must leave room for the diagonal entry even if it is zero.
2130 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2131            submatrix (same value is used for all local rows).
2132 -  o_nnz - array containing the number of nonzeros in the various rows of the
2133            OFF-DIAGONAL portion of the local submatrix (possibly different for
2134            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2135            structure. The size of this array is equal to the number
2136            of local rows, i.e 'm'.
2137 
2138    The AIJ format (also called the Yale sparse matrix format or
2139    compressed row storage), is fully compatible with standard Fortran 77
2140    storage.  That is, the stored row and column indices can begin at
2141    either one (as in Fortran) or zero.  See the users manual for details.
2142 
2143    The user MUST specify either the local or global matrix dimensions
2144    (possibly both).
2145 
2146    The parallel matrix is partitioned such that the first m0 rows belong to
2147    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2148    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2149 
2150    The DIAGONAL portion of the local submatrix of a processor can be defined
2151    as the submatrix which is obtained by extraction the part corresponding
2152    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2153    first row that belongs to the processor, and r2 is the last row belonging
2154    to the this processor. This is a square mxm matrix. The remaining portion
2155    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2156 
2157    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2158 
2159    By default, this format uses inodes (identical nodes) when possible.
2160    We search for consecutive rows with the same nonzero structure, thereby
2161    reusing matrix information to achieve increased efficiency.
2162 
2163    Options Database Keys:
2164 +  -mat_aij_no_inode  - Do not use inodes
2165 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2166 -  -mat_aij_oneindex - Internally use indexing starting at 1
2167         rather than 0.  Note that when calling MatSetValues(),
2168         the user still MUST index entries starting at 0!
2169 
2170    Example usage:
2171 
2172    Consider the following 8x8 matrix with 34 non-zero values, that is
2173    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2174    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2175    as follows:
2176 
2177 .vb
2178             1  2  0  |  0  3  0  |  0  4
2179     Proc0   0  5  6  |  7  0  0  |  8  0
2180             9  0 10  | 11  0  0  | 12  0
2181     -------------------------------------
2182            13  0 14  | 15 16 17  |  0  0
2183     Proc1   0 18  0  | 19 20 21  |  0  0
2184             0  0  0  | 22 23  0  | 24  0
2185     -------------------------------------
2186     Proc2  25 26 27  |  0  0 28  | 29  0
2187            30  0  0  | 31 32 33  |  0 34
2188 .ve
2189 
2190    This can be represented as a collection of submatrices as:
2191 
2192 .vb
2193       A B C
2194       D E F
2195       G H I
2196 .ve
2197 
2198    Where the submatrices A,B,C are owned by proc0, D,E,F are
2199    owned by proc1, G,H,I are owned by proc2.
2200 
2201    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2202    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2203    The 'M','N' parameters are 8,8, and have the same values on all procs.
2204 
2205    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2206    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2207    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2208    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2209    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2210    matrix, ans [DF] as another SeqAIJ matrix.
2211 
2212    When d_nz, o_nz parameters are specified, d_nz storage elements are
2213    allocated for every row of the local diagonal submatrix, and o_nz
2214    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2215    One way to choose d_nz and o_nz is to use the max nonzerors per local
2216    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2217    In this case, the values of d_nz,o_nz are:
2218 .vb
2219      proc0 : dnz = 2, o_nz = 2
2220      proc1 : dnz = 3, o_nz = 2
2221      proc2 : dnz = 1, o_nz = 4
2222 .ve
2223    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2224    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2225    for proc3. i.e we are using 12+15+10=37 storage locations to store
2226    34 values.
2227 
2228    When d_nnz, o_nnz parameters are specified, the storage is specified
2229    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2230    In the above case the values for d_nnz,o_nnz are:
2231 .vb
2232      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2233      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2234      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2235 .ve
2236    Here the space allocated is sum of all the above values i.e 34, and
2237    hence pre-allocation is perfect.
2238 
2239    Level: intermediate
2240 
2241 .keywords: matrix, aij, compressed row, sparse, parallel
2242 
2243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2244 @*/
2245 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2246 {
2247   Mat_MPIAIJ   *b;
2248   int          ierr,i;
2249   PetscTruth   flg2;
2250 
2251   PetscFunctionBegin;
2252   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2253   if (!flg2) PetscFunctionReturn(0);
2254   B->preallocated = PETSC_TRUE;
2255   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2256   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2257   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2258   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2259   if (d_nnz) {
2260     for (i=0; i<B->m; i++) {
2261       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2262     }
2263   }
2264   if (o_nnz) {
2265     for (i=0; i<B->m; i++) {
2266       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2267     }
2268   }
2269   b = (Mat_MPIAIJ*)B->data;
2270 
2271   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2272   PetscLogObjectParent(B,b->A);
2273   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2274   PetscLogObjectParent(B,b->B);
2275 
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatCreateMPIAIJ"
2281 /*@C
2282    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2283    (the default parallel PETSc format).  For good matrix assembly performance
2284    the user should preallocate the matrix storage by setting the parameters
2285    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2286    performance can be increased by more than a factor of 50.
2287 
2288    Collective on MPI_Comm
2289 
2290    Input Parameters:
2291 +  comm - MPI communicator
2292 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2293            This value should be the same as the local size used in creating the
2294            y vector for the matrix-vector product y = Ax.
2295 .  n - This value should be the same as the local size used in creating the
2296        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2297        calculated if N is given) For square matrices n is almost always m.
2298 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2299 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2300 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2301            (same value is used for all local rows)
2302 .  d_nnz - array containing the number of nonzeros in the various rows of the
2303            DIAGONAL portion of the local submatrix (possibly different for each row)
2304            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2305            The size of this array is equal to the number of local rows, i.e 'm'.
2306            You must leave room for the diagonal entry even if it is zero.
2307 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2308            submatrix (same value is used for all local rows).
2309 -  o_nnz - array containing the number of nonzeros in the various rows of the
2310            OFF-DIAGONAL portion of the local submatrix (possibly different for
2311            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2312            structure. The size of this array is equal to the number
2313            of local rows, i.e 'm'.
2314 
2315    Output Parameter:
2316 .  A - the matrix
2317 
2318    Notes:
2319    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2320    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2321    storage requirements for this matrix.
2322 
2323    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2324    processor than it must be used on all processors that share the object for
2325    that argument.
2326 
2327    The AIJ format (also called the Yale sparse matrix format or
2328    compressed row storage), is fully compatible with standard Fortran 77
2329    storage.  That is, the stored row and column indices can begin at
2330    either one (as in Fortran) or zero.  See the users manual for details.
2331 
2332    The user MUST specify either the local or global matrix dimensions
2333    (possibly both).
2334 
2335    The parallel matrix is partitioned such that the first m0 rows belong to
2336    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2337    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2338 
2339    The DIAGONAL portion of the local submatrix of a processor can be defined
2340    as the submatrix which is obtained by extraction the part corresponding
2341    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2342    first row that belongs to the processor, and r2 is the last row belonging
2343    to the this processor. This is a square mxm matrix. The remaining portion
2344    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2345 
2346    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2347 
2348    By default, this format uses inodes (identical nodes) when possible.
2349    We search for consecutive rows with the same nonzero structure, thereby
2350    reusing matrix information to achieve increased efficiency.
2351 
2352    Options Database Keys:
2353 +  -mat_aij_no_inode  - Do not use inodes
2354 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2355 -  -mat_aij_oneindex - Internally use indexing starting at 1
2356         rather than 0.  Note that when calling MatSetValues(),
2357         the user still MUST index entries starting at 0!
2358 
2359 
2360    Example usage:
2361 
2362    Consider the following 8x8 matrix with 34 non-zero values, that is
2363    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2364    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2365    as follows:
2366 
2367 .vb
2368             1  2  0  |  0  3  0  |  0  4
2369     Proc0   0  5  6  |  7  0  0  |  8  0
2370             9  0 10  | 11  0  0  | 12  0
2371     -------------------------------------
2372            13  0 14  | 15 16 17  |  0  0
2373     Proc1   0 18  0  | 19 20 21  |  0  0
2374             0  0  0  | 22 23  0  | 24  0
2375     -------------------------------------
2376     Proc2  25 26 27  |  0  0 28  | 29  0
2377            30  0  0  | 31 32 33  |  0 34
2378 .ve
2379 
2380    This can be represented as a collection of submatrices as:
2381 
2382 .vb
2383       A B C
2384       D E F
2385       G H I
2386 .ve
2387 
2388    Where the submatrices A,B,C are owned by proc0, D,E,F are
2389    owned by proc1, G,H,I are owned by proc2.
2390 
2391    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2392    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2393    The 'M','N' parameters are 8,8, and have the same values on all procs.
2394 
2395    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2396    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2397    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2398    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2399    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2400    matrix, ans [DF] as another SeqAIJ matrix.
2401 
2402    When d_nz, o_nz parameters are specified, d_nz storage elements are
2403    allocated for every row of the local diagonal submatrix, and o_nz
2404    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2405    One way to choose d_nz and o_nz is to use the max nonzerors per local
2406    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2407    In this case, the values of d_nz,o_nz are:
2408 .vb
2409      proc0 : dnz = 2, o_nz = 2
2410      proc1 : dnz = 3, o_nz = 2
2411      proc2 : dnz = 1, o_nz = 4
2412 .ve
2413    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2414    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2415    for proc3. i.e we are using 12+15+10=37 storage locations to store
2416    34 values.
2417 
2418    When d_nnz, o_nnz parameters are specified, the storage is specified
2419    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2420    In the above case the values for d_nnz,o_nnz are:
2421 .vb
2422      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2423      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2424      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2425 .ve
2426    Here the space allocated is sum of all the above values i.e 34, and
2427    hence pre-allocation is perfect.
2428 
2429    Level: intermediate
2430 
2431 .keywords: matrix, aij, compressed row, sparse, parallel
2432 
2433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2434 @*/
2435 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2436 {
2437   int ierr,size;
2438 
2439   PetscFunctionBegin;
2440   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2441   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2442   if (size > 1) {
2443     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2444     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2445   } else {
2446     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2447     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2454 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2455 {
2456   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2457   PetscFunctionBegin;
2458   *Ad     = a->A;
2459   *Ao     = a->B;
2460   *colmap = a->garray;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2466 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2467 {
2468   int        ierr,i;
2469   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2470 
2471   PetscFunctionBegin;
2472   if (coloring->ctype == IS_COLORING_LOCAL) {
2473     ISColoringValue *allcolors,*colors;
2474     ISColoring      ocoloring;
2475 
2476     /* set coloring for diagonal portion */
2477     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2478 
2479     /* set coloring for off-diagonal portion */
2480     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2481     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2482     for (i=0; i<a->B->n; i++) {
2483       colors[i] = allcolors[a->garray[i]];
2484     }
2485     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2486     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2487     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2488     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2489   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2490     ISColoringValue *colors;
2491     int             *larray;
2492     ISColoring      ocoloring;
2493 
2494     /* set coloring for diagonal portion */
2495     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2496     for (i=0; i<a->A->n; i++) {
2497       larray[i] = i + a->cstart;
2498     }
2499     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2500     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2501     for (i=0; i<a->A->n; i++) {
2502       colors[i] = coloring->colors[larray[i]];
2503     }
2504     ierr = PetscFree(larray);CHKERRQ(ierr);
2505     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2506     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2507     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2508 
2509     /* set coloring for off-diagonal portion */
2510     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2511     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2512     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2513     for (i=0; i<a->B->n; i++) {
2514       colors[i] = coloring->colors[larray[i]];
2515     }
2516     ierr = PetscFree(larray);CHKERRQ(ierr);
2517     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2518     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2519     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2520   } else {
2521     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2522   }
2523 
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2529 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2530 {
2531   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2532   int        ierr;
2533 
2534   PetscFunctionBegin;
2535   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2536   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2542 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2543 {
2544   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2545   int        ierr;
2546 
2547   PetscFunctionBegin;
2548   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2549   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatMerge"
2555 /*@C
2556       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2557                  matrices from each processor
2558 
2559     Collective on MPI_Comm
2560 
2561    Input Parameters:
2562 +    comm - the communicators the parallel matrix will live on
2563 -    inmat - the input sequential matrices
2564 
2565    Output Parameter:
2566 .    outmat - the parallel matrix generated
2567 
2568     Level: advanced
2569 
2570    Notes: The number of columns of the matrix in EACH of the seperate files
2571       MUST be the same.
2572 
2573 @*/
2574 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2575 {
2576   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2577   PetscScalar *values;
2578   PetscMap    columnmap,rowmap;
2579 
2580   PetscFunctionBegin;
2581 
2582   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2583 
2584   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2585   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2586   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2587   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2588   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2589   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2590 
2591   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2592   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2593   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2594   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2595   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2596 
2597   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2598   for (i=0;i<m;i++) {
2599     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2600     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2601     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2602   }
2603   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2604   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2605 
2606   for (i=0;i<m;i++) {
2607     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2608     I    = i + rstart;
2609     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2610     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2611   }
2612   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2613   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2614   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2615 
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "MatFileSplit"
2621 int MatFileSplit(Mat A,char *outfile)
2622 {
2623   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2624   PetscViewer out;
2625   char        *name;
2626   Mat         B;
2627   PetscScalar *values;
2628 
2629   PetscFunctionBegin;
2630 
2631   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2632   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2633   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2634   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2635   for (i=0;i<m;i++) {
2636     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2637     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2638     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2639   }
2640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2642 
2643   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2644   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2645   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2646   sprintf(name,"%s.%d",outfile,rank);
2647   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2648   ierr = PetscFree(name);
2649   ierr = MatView(B,out);CHKERRQ(ierr);
2650   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2651   ierr = MatDestroy(B);CHKERRQ(ierr);
2652   PetscFunctionReturn(0);
2653 }
2654