xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 32a4b47a72620a1b77aa3c4e8ddc8668bd2d097b)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 EXTERN int MatUseSpooles_MPIAIJ(Mat);
15 
16 /*
17   Local utility routine that creates a mapping from the global column
18 number to the local number in the off-diagonal part of the local
19 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
20 a slightly higher hash table cost; without it it is not scalable (each processor
21 has an order N integer array but is fast to acess.
22 */
23 #undef __FUNCT__
24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
25 int CreateColmap_MPIAIJ_Private(Mat mat)
26 {
27   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
28   int        n = aij->B->n,i,ierr;
29 
30   PetscFunctionBegin;
31 #if defined (PETSC_USE_CTABLE)
32   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
33   for (i=0; i<n; i++){
34     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
35   }
36 #else
37   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
38   PetscLogObjectMemory(mat,mat->N*sizeof(int));
39   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
41 #endif
42   PetscFunctionReturn(0);
43 }
44 
45 #define CHUNKSIZE   15
46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
47 { \
48  \
49     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
50     rmax = aimax[row]; nrow = ailen[row];  \
51     col1 = col - shift; \
52      \
53     low = 0; high = nrow; \
54     while (high-low > 5) { \
55       t = (low+high)/2; \
56       if (rp[t] > col) high = t; \
57       else             low  = t; \
58     } \
59       for (_i=low; _i<high; _i++) { \
60         if (rp[_i] > col1) break; \
61         if (rp[_i] == col1) { \
62           if (addv == ADD_VALUES) ap[_i] += value;   \
63           else                  ap[_i] = value; \
64           goto a_noinsert; \
65         } \
66       }  \
67       if (nonew == 1) goto a_noinsert; \
68       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
69       if (nrow >= rmax) { \
70         /* there is no extra room in row, therefore enlarge */ \
71         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
72         PetscScalar *new_a; \
73  \
74         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
75  \
76         /* malloc new storage space */ \
77         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
78         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
79         new_j   = (int*)(new_a + new_nz); \
80         new_i   = new_j + new_nz; \
81  \
82         /* copy over old data into new slots */ \
83         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
84         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
85         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
86         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
87         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
88                                                            len*sizeof(int));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
90         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
91                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
92         /* free up old matrix storage */ \
93  \
94         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
95         if (!a->singlemalloc) { \
96            ierr = PetscFree(a->i);CHKERRQ(ierr); \
97            ierr = PetscFree(a->j);CHKERRQ(ierr); \
98         } \
99         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
100         a->singlemalloc = PETSC_TRUE; \
101  \
102         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
103         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
104         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
105         a->maxnz += CHUNKSIZE; \
106         a->reallocs++; \
107       } \
108       N = nrow++ - 1; a->nz++; \
109       /* shift up all the later entries in this row */ \
110       for (ii=N; ii>=_i; ii--) { \
111         rp[ii+1] = rp[ii]; \
112         ap[ii+1] = ap[ii]; \
113       } \
114       rp[_i] = col1;  \
115       ap[_i] = value;  \
116       a_noinsert: ; \
117       ailen[row] = nrow; \
118 }
119 
120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
121 { \
122  \
123     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
124     rmax = bimax[row]; nrow = bilen[row];  \
125     col1 = col - shift; \
126      \
127     low = 0; high = nrow; \
128     while (high-low > 5) { \
129       t = (low+high)/2; \
130       if (rp[t] > col) high = t; \
131       else             low  = t; \
132     } \
133        for (_i=low; _i<high; _i++) { \
134         if (rp[_i] > col1) break; \
135         if (rp[_i] == col1) { \
136           if (addv == ADD_VALUES) ap[_i] += value;   \
137           else                  ap[_i] = value; \
138           goto b_noinsert; \
139         } \
140       }  \
141       if (nonew == 1) goto b_noinsert; \
142       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
143       if (nrow >= rmax) { \
144         /* there is no extra room in row, therefore enlarge */ \
145         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
146         PetscScalar *new_a; \
147  \
148         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
149  \
150         /* malloc new storage space */ \
151         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
152         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
153         new_j   = (int*)(new_a + new_nz); \
154         new_i   = new_j + new_nz; \
155  \
156         /* copy over old data into new slots */ \
157         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
158         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
159         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
160         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
161         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
162                                                            len*sizeof(int));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
164         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
165                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
166         /* free up old matrix storage */ \
167  \
168         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
169         if (!b->singlemalloc) { \
170           ierr = PetscFree(b->i);CHKERRQ(ierr); \
171           ierr = PetscFree(b->j);CHKERRQ(ierr); \
172         } \
173         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
174         b->singlemalloc = PETSC_TRUE; \
175  \
176         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
177         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
178         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
179         b->maxnz += CHUNKSIZE; \
180         b->reallocs++; \
181       } \
182       N = nrow++ - 1; b->nz++; \
183       /* shift up all the later entries in this row */ \
184       for (ii=N; ii>=_i; ii--) { \
185         rp[ii+1] = rp[ii]; \
186         ap[ii+1] = ap[ii]; \
187       } \
188       rp[_i] = col1;  \
189       ap[_i] = value;  \
190       b_noinsert: ; \
191       bilen[row] = nrow; \
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatSetValues_MPIAIJ"
196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
197 {
198   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
199   PetscScalar  value;
200   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
201   int          cstart = aij->cstart,cend = aij->cend,row,col;
202   PetscTruth   roworiented = aij->roworiented;
203 
204   /* Some Variables required in the macro */
205   Mat          A = aij->A;
206   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
207   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
208   PetscScalar  *aa = a->a;
209   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
210   Mat          B = aij->B;
211   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
212   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
213   PetscScalar  *ba = b->a;
214 
215   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
216   int          nonew = a->nonew,shift = a->indexshift;
217   PetscScalar  *ap;
218 
219   PetscFunctionBegin;
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_BOPT_g)
223     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
224 #endif
225     if (im[i] >= rstart && im[i] < rend) {
226       row = im[i] - rstart;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart && in[j] < cend){
229           col = in[j] - cstart;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           if (ignorezeroentries && value == 0.0) continue;
232           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
291     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
296         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   PetscScalar *val;
358   InsertMode  addv = mat->insertmode;
359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
360   PetscTruth  flag;
361 #endif
362 
363   PetscFunctionBegin;
364   if (!aij->donotstash) {
365     while (1) {
366       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
367       if (!flg) break;
368 
369       for (i=0; i<n;) {
370         /* Now identify the consecutive vals belonging to the same row */
371         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
372         if (j < n) ncols = j-i;
373         else       ncols = n-i;
374         /* Now assemble all these values with a single function call */
375         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
376         i = j;
377       }
378     }
379     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
380   }
381 
382   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
383   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
384 
385   /* determine if any processor has disassembled, if so we must
386      also disassemble ourselfs, in order that we may reassemble. */
387   /*
388      if nonzero structure of submatrix B cannot change then we know that
389      no processor disassembled thus we can skip this stuff
390   */
391   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
392     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
393     if (mat->was_assembled && !other_disassembled) {
394       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
395     }
396   }
397 
398   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
399     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
400   }
401   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
402   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
403 
404   if (aij->rowvalues) {
405     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
406     aij->rowvalues = 0;
407   }
408 #if defined(PETSC_HAVE_SUPERLUDIST)
409   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
410   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
411 #endif
412 
413 #if defined(PETSC_HAVE_SPOOLES)
414   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
415   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
416 #endif
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
422 int MatZeroEntries_MPIAIJ(Mat A)
423 {
424   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
425   int        ierr;
426 
427   PetscFunctionBegin;
428   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
429   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatZeroRows_MPIAIJ"
435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
436 {
437   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
438   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
439   int            *procs,*nprocs,j,idx,nsends,*work,row;
440   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
441   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
442   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
443   MPI_Comm       comm = A->comm;
444   MPI_Request    *send_waits,*recv_waits;
445   MPI_Status     recv_status,*send_status;
446   IS             istmp;
447   PetscTruth     found;
448 
449   PetscFunctionBegin;
450   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
451   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
452 
453   /*  first count number of contributors to each processor */
454   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
455   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
456   procs  = nprocs + size;
457   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
458   for (i=0; i<N; i++) {
459     idx = rows[i];
460     found = PETSC_FALSE;
461     for (j=0; j<size; j++) {
462       if (idx >= owners[j] && idx < owners[j+1]) {
463         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
464       }
465     }
466     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
467   }
468   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
469 
470   /* inform other processors of number of messages and max length*/
471   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
472   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
473   nrecvs = work[size+rank];
474   nmax   = work[rank];
475   ierr   = PetscFree(work);CHKERRQ(ierr);
476 
477   /* post receives:   */
478   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
480   for (i=0; i<nrecvs; i++) {
481     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
482   }
483 
484   /* do sends:
485       1) starts[i] gives the starting index in svalues for stuff going to
486          the ith processor
487   */
488   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
489   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
490   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
491   starts[0] = 0;
492   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
493   for (i=0; i<N; i++) {
494     svalues[starts[owner[i]]++] = rows[i];
495   }
496   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
497 
498   starts[0] = 0;
499   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
500   count = 0;
501   for (i=0; i<size; i++) {
502     if (procs[i]) {
503       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
504     }
505   }
506   ierr = PetscFree(starts);CHKERRQ(ierr);
507 
508   base = owners[rank];
509 
510   /*  wait on receives */
511   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
512   source = lens + nrecvs;
513   count  = nrecvs; slen = 0;
514   while (count) {
515     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
516     /* unpack receives into our local space */
517     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
518     source[imdex]  = recv_status.MPI_SOURCE;
519     lens[imdex]    = n;
520     slen          += n;
521     count--;
522   }
523   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
524 
525   /* move the data into the send scatter */
526   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
527   count = 0;
528   for (i=0; i<nrecvs; i++) {
529     values = rvalues + i*nmax;
530     for (j=0; j<lens[i]; j++) {
531       lrows[count++] = values[j] - base;
532     }
533   }
534   ierr = PetscFree(rvalues);CHKERRQ(ierr);
535   ierr = PetscFree(lens);CHKERRQ(ierr);
536   ierr = PetscFree(owner);CHKERRQ(ierr);
537   ierr = PetscFree(nprocs);CHKERRQ(ierr);
538 
539   /* actually zap the local rows */
540   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
541   PetscLogObjectParent(A,istmp);
542 
543   /*
544         Zero the required rows. If the "diagonal block" of the matrix
545      is square and the user wishes to set the diagonal we use seperate
546      code so that MatSetValues() is not called for each diagonal allocating
547      new memory, thus calling lots of mallocs and slowing things down.
548 
549        Contributed by: Mathew Knepley
550   */
551   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
552   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
553   if (diag && (l->A->M == l->A->N)) {
554     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
555   } else if (diag) {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
558       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
560     }
561     for (i = 0; i < slen; i++) {
562       row  = lrows[i] + rstart;
563       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
564     }
565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   } else {
568     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
569   }
570   ierr = ISDestroy(istmp);CHKERRQ(ierr);
571   ierr = PetscFree(lrows);CHKERRQ(ierr);
572 
573   /* wait on sends */
574   if (nsends) {
575     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
576     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
577     ierr = PetscFree(send_status);CHKERRQ(ierr);
578   }
579   ierr = PetscFree(send_waits);CHKERRQ(ierr);
580   ierr = PetscFree(svalues);CHKERRQ(ierr);
581 
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatMult_MPIAIJ"
587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
588 {
589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590   int        ierr,nt;
591 
592   PetscFunctionBegin;
593   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
594   if (nt != A->n) {
595     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
596   }
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultAdd_MPIAIJ"
606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
614   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
615   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
622 {
623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
624   int        ierr;
625 
626   PetscFunctionBegin;
627   /* do nondiagonal part */
628   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
629   /* send it on its way */
630   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   /* do local part */
632   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
633   /* receive remote parts: note this assumes the values are not actually */
634   /* inserted in yy until the next line, which is true for my implementation*/
635   /* but is not perhaps always true. */
636   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
645   int        ierr;
646 
647   PetscFunctionBegin;
648   /* do nondiagonal part */
649   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
650   /* send it on its way */
651   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   /* do local part */
653   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
654   /* receive remote parts: note this assumes the values are not actually */
655   /* inserted in yy until the next line, which is true for my implementation*/
656   /* but is not perhaps always true. */
657   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 /*
662   This only works correctly for square matrices where the subblock A->A is the
663    diagonal block
664 */
665 #undef __FUNCT__
666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
668 {
669   int        ierr;
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671 
672   PetscFunctionBegin;
673   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
674   if (a->rstart != a->cstart || a->rend != a->cend) {
675     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
676   }
677   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatScale_MPIAIJ"
683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
684 {
685   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
690   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatDestroy_MPIAIJ"
696 int MatDestroy_MPIAIJ(Mat mat)
697 {
698   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
699   int        ierr;
700 
701   PetscFunctionBegin;
702 #if defined(PETSC_USE_LOG)
703   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
723 extern int MatMPIAIJFactorInfo_Spooles(Mat,PetscViewer);
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
731   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
732   PetscTruth        isdraw,isascii,flg;
733   PetscViewer       sviewer;
734   PetscViewerFormat format;
735 
736   PetscFunctionBegin;
737   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
738   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
739   if (isascii) {
740     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
741     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
742       MatInfo info;
743       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
744       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
746       if (flg) {
747         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
748 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
749       } else {
750         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
751 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
752       }
753       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
755       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
756       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
757       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
758       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
759       PetscFunctionReturn(0);
760     } else if (format == PETSC_VIEWER_ASCII_INFO) {
761       PetscFunctionReturn(0);
762     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
763 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
764       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
765 #endif
766 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
767       ierr = MatMPIAIJFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
768 #endif
769       PetscFunctionReturn(0);
770     }
771   } else if (isdraw) {
772     PetscDraw       draw;
773     PetscTruth isnull;
774     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
775     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
776   }
777 
778   if (size == 1) {
779     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
780     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
781   } else {
782     /* assemble the entire matrix onto first processor. */
783     Mat         A;
784     Mat_SeqAIJ *Aloc;
785     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
786     PetscScalar *a;
787 
788     if (!rank) {
789       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
790     } else {
791       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     }
793     PetscLogObjectParent(mat,A);
794 
795     /* copy over the A part */
796     Aloc = (Mat_SeqAIJ*)aij->A->data;
797     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
798     row = aij->rstart;
799     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
800     for (i=0; i<m; i++) {
801       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
802       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
803     }
804     aj = Aloc->j;
805     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
806 
807     /* copy over the B part */
808     Aloc = (Mat_SeqAIJ*)aij->B->data;
809     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
810     row  = aij->rstart;
811     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
812     ct   = cols;
813     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
814     for (i=0; i<m; i++) {
815       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
816       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
817     }
818     ierr = PetscFree(ct);CHKERRQ(ierr);
819     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     /*
822        Everyone has to call to draw the matrix since the graphics waits are
823        synchronized across all processors that share the PetscDraw object
824     */
825     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
826     if (!rank) {
827       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
828       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
829     }
830     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
831     ierr = MatDestroy(A);CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatView_MPIAIJ"
838 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
839 {
840   int        ierr;
841   PetscTruth isascii,isdraw,issocket,isbinary;
842 
843   PetscFunctionBegin;
844   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
845   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
846   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
847   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
848   if (isascii || isdraw || isbinary || issocket) {
849     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
850   } else {
851     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatRelax_MPIAIJ"
860 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
861 {
862   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
863   int          ierr;
864   Vec          bb1;
865   PetscScalar  mone=-1.0;
866 
867   PetscFunctionBegin;
868   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
869 
870   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
871 
872   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
873     if (flag & SOR_ZERO_INITIAL_GUESS) {
874       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
875       its--;
876     }
877 
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
908     if (flag & SOR_ZERO_INITIAL_GUESS) {
909       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
910       its--;
911     }
912     while (its--) {
913       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
914       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
915 
916       /* update rhs: bb1 = bb - B*x */
917       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
918       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
919 
920       /* local sweep */
921       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
922       CHKERRQ(ierr);
923     }
924   } else {
925     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
926   }
927 
928   ierr = VecDestroy(bb1);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatGetInfo_MPIAIJ"
934 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
935 {
936   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
937   Mat        A = mat->A,B = mat->B;
938   int        ierr;
939   PetscReal  isend[5],irecv[5];
940 
941   PetscFunctionBegin;
942   info->block_size     = 1.0;
943   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
944   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
945   isend[3] = info->memory;  isend[4] = info->mallocs;
946   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
947   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
948   isend[3] += info->memory;  isend[4] += info->mallocs;
949   if (flag == MAT_LOCAL) {
950     info->nz_used      = isend[0];
951     info->nz_allocated = isend[1];
952     info->nz_unneeded  = isend[2];
953     info->memory       = isend[3];
954     info->mallocs      = isend[4];
955   } else if (flag == MAT_GLOBAL_MAX) {
956     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
957     info->nz_used      = irecv[0];
958     info->nz_allocated = irecv[1];
959     info->nz_unneeded  = irecv[2];
960     info->memory       = irecv[3];
961     info->mallocs      = irecv[4];
962   } else if (flag == MAT_GLOBAL_SUM) {
963     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
964     info->nz_used      = irecv[0];
965     info->nz_allocated = irecv[1];
966     info->nz_unneeded  = irecv[2];
967     info->memory       = irecv[3];
968     info->mallocs      = irecv[4];
969   }
970   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
971   info->fill_ratio_needed = 0;
972   info->factor_mallocs    = 0;
973   info->rows_global       = (double)matin->M;
974   info->columns_global    = (double)matin->N;
975   info->rows_local        = (double)matin->m;
976   info->columns_local     = (double)matin->N;
977 
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatSetOption_MPIAIJ"
983 int MatSetOption_MPIAIJ(Mat A,MatOption op)
984 {
985   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
986   int        ierr;
987 
988   PetscFunctionBegin;
989   switch (op) {
990   case MAT_NO_NEW_NONZERO_LOCATIONS:
991   case MAT_YES_NEW_NONZERO_LOCATIONS:
992   case MAT_COLUMNS_UNSORTED:
993   case MAT_COLUMNS_SORTED:
994   case MAT_NEW_NONZERO_ALLOCATION_ERR:
995   case MAT_KEEP_ZEROED_ROWS:
996   case MAT_NEW_NONZERO_LOCATION_ERR:
997   case MAT_USE_INODES:
998   case MAT_DO_NOT_USE_INODES:
999   case MAT_IGNORE_ZERO_ENTRIES:
1000     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1001     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1002     break;
1003   case MAT_ROW_ORIENTED:
1004     a->roworiented = PETSC_TRUE;
1005     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1006     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1007     break;
1008   case MAT_ROWS_SORTED:
1009   case MAT_ROWS_UNSORTED:
1010   case MAT_YES_NEW_DIAGONALS:
1011   case MAT_USE_SINGLE_PRECISION_SOLVES:
1012     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1013     break;
1014   case MAT_COLUMN_ORIENTED:
1015     a->roworiented = PETSC_FALSE;
1016     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1017     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1018     break;
1019   case MAT_IGNORE_OFF_PROC_ENTRIES:
1020     a->donotstash = PETSC_TRUE;
1021     break;
1022   case MAT_NO_NEW_DIAGONALS:
1023     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1024   default:
1025     SETERRQ(PETSC_ERR_SUP,"unknown option");
1026   }
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatGetRow_MPIAIJ"
1032 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1033 {
1034   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1035   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1036   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1037   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1038   int          *cmap,*idx_p;
1039 
1040   PetscFunctionBegin;
1041   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1042   mat->getrowactive = PETSC_TRUE;
1043 
1044   if (!mat->rowvalues && (idx || v)) {
1045     /*
1046         allocate enough space to hold information from the longest row.
1047     */
1048     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1049     int     max = 1,tmp;
1050     for (i=0; i<matin->m; i++) {
1051       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1052       if (max < tmp) { max = tmp; }
1053     }
1054     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1055     mat->rowindices = (int*)(mat->rowvalues + max);
1056   }
1057 
1058   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1059   lrow = row - rstart;
1060 
1061   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1062   if (!v)   {pvA = 0; pvB = 0;}
1063   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1064   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1065   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1066   nztot = nzA + nzB;
1067 
1068   cmap  = mat->garray;
1069   if (v  || idx) {
1070     if (nztot) {
1071       /* Sort by increasing column numbers, assuming A and B already sorted */
1072       int imark = -1;
1073       if (v) {
1074         *v = v_p = mat->rowvalues;
1075         for (i=0; i<nzB; i++) {
1076           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1077           else break;
1078         }
1079         imark = i;
1080         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1081         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1082       }
1083       if (idx) {
1084         *idx = idx_p = mat->rowindices;
1085         if (imark > -1) {
1086           for (i=0; i<imark; i++) {
1087             idx_p[i] = cmap[cworkB[i]];
1088           }
1089         } else {
1090           for (i=0; i<nzB; i++) {
1091             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1092             else break;
1093           }
1094           imark = i;
1095         }
1096         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1097         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1098       }
1099     } else {
1100       if (idx) *idx = 0;
1101       if (v)   *v   = 0;
1102     }
1103   }
1104   *nz = nztot;
1105   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1106   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1112 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1113 {
1114   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1115 
1116   PetscFunctionBegin;
1117   if (aij->getrowactive == PETSC_FALSE) {
1118     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1119   }
1120   aij->getrowactive = PETSC_FALSE;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatNorm_MPIAIJ"
1126 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1127 {
1128   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1129   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1130   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1131   PetscReal    sum = 0.0;
1132   PetscScalar  *v;
1133 
1134   PetscFunctionBegin;
1135   if (aij->size == 1) {
1136     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1137   } else {
1138     if (type == NORM_FROBENIUS) {
1139       v = amat->a;
1140       for (i=0; i<amat->nz; i++) {
1141 #if defined(PETSC_USE_COMPLEX)
1142         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1143 #else
1144         sum += (*v)*(*v); v++;
1145 #endif
1146       }
1147       v = bmat->a;
1148       for (i=0; i<bmat->nz; i++) {
1149 #if defined(PETSC_USE_COMPLEX)
1150         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1151 #else
1152         sum += (*v)*(*v); v++;
1153 #endif
1154       }
1155       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1156       *norm = sqrt(*norm);
1157     } else if (type == NORM_1) { /* max column norm */
1158       PetscReal *tmp,*tmp2;
1159       int    *jj,*garray = aij->garray;
1160       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1161       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1162       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1163       *norm = 0.0;
1164       v = amat->a; jj = amat->j;
1165       for (j=0; j<amat->nz; j++) {
1166         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1167       }
1168       v = bmat->a; jj = bmat->j;
1169       for (j=0; j<bmat->nz; j++) {
1170         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1171       }
1172       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1173       for (j=0; j<mat->N; j++) {
1174         if (tmp2[j] > *norm) *norm = tmp2[j];
1175       }
1176       ierr = PetscFree(tmp);CHKERRQ(ierr);
1177       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1178     } else if (type == NORM_INFINITY) { /* max row norm */
1179       PetscReal ntemp = 0.0;
1180       for (j=0; j<aij->A->m; j++) {
1181         v = amat->a + amat->i[j] + shift;
1182         sum = 0.0;
1183         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1184           sum += PetscAbsScalar(*v); v++;
1185         }
1186         v = bmat->a + bmat->i[j] + shift;
1187         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1188           sum += PetscAbsScalar(*v); v++;
1189         }
1190         if (sum > ntemp) ntemp = sum;
1191       }
1192       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1193     } else {
1194       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1195     }
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatTranspose_MPIAIJ"
1202 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1203 {
1204   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1205   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1206   int          ierr,shift = Aloc->indexshift;
1207   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1208   Mat          B;
1209   PetscScalar  *array;
1210 
1211   PetscFunctionBegin;
1212   if (!matout && M != N) {
1213     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1214   }
1215 
1216   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1217 
1218   /* copy over the A part */
1219   Aloc = (Mat_SeqAIJ*)a->A->data;
1220   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1221   row = a->rstart;
1222   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1223   for (i=0; i<m; i++) {
1224     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1225     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1226   }
1227   aj = Aloc->j;
1228   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1229 
1230   /* copy over the B part */
1231   Aloc = (Mat_SeqAIJ*)a->B->data;
1232   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1233   row  = a->rstart;
1234   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1235   ct   = cols;
1236   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1237   for (i=0; i<m; i++) {
1238     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1239     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1240   }
1241   ierr = PetscFree(ct);CHKERRQ(ierr);
1242   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1243   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1244   if (matout) {
1245     *matout = B;
1246   } else {
1247     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1254 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1255 {
1256   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257   Mat        a = aij->A,b = aij->B;
1258   int        ierr,s1,s2,s3;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1262   if (rr) {
1263     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1264     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1265     /* Overlap communication with computation. */
1266     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1267   }
1268   if (ll) {
1269     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1270     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1271     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1272   }
1273   /* scale  the diagonal block */
1274   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1275 
1276   if (rr) {
1277     /* Do a scatter end and then right scale the off-diagonal block */
1278     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1279     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1280   }
1281 
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1288 int MatPrintHelp_MPIAIJ(Mat A)
1289 {
1290   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1291   int        ierr;
1292 
1293   PetscFunctionBegin;
1294   if (!a->rank) {
1295     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1302 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1303 {
1304   PetscFunctionBegin;
1305   *bs = 1;
1306   PetscFunctionReturn(0);
1307 }
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1310 int MatSetUnfactored_MPIAIJ(Mat A)
1311 {
1312   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1313   int        ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatEqual_MPIAIJ"
1322 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1323 {
1324   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1325   Mat        a,b,c,d;
1326   PetscTruth flg;
1327   int        ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1331   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg == PETSC_TRUE) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPIAIJ"
1345 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   int        ierr;
1348   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1349   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1350   PetscTruth flg;
1351 
1352   PetscFunctionBegin;
1353   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1354   if (str != SAME_NONZERO_PATTERN || !flg) {
1355     /* because of the column compression in the off-processor part of the matrix a->B,
1356        the number of columns in a->B and b->B may be different, hence we cannot call
1357        the MatCopy() directly on the two parts. If need be, we can provide a more
1358        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1359        then copying the submatrices */
1360     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1361   } else {
1362     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1363     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1370 int MatSetUpPreallocation_MPIAIJ(Mat A)
1371 {
1372   int        ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1380 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1381 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1382 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1383 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1384 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1385 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1386 #endif
1387 
1388 #include "petscblaslapack.h"
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatAXPY_MPIAIJ"
1392 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1393 {
1394   int        ierr,one;
1395   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1396   Mat_SeqAIJ *x,*y;
1397 
1398   PetscFunctionBegin;
1399   if (str == SAME_NONZERO_PATTERN) {
1400     x  = (Mat_SeqAIJ *)xx->A->data;
1401     y  = (Mat_SeqAIJ *)yy->A->data;
1402     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1403     x  = (Mat_SeqAIJ *)xx->B->data;
1404     y  = (Mat_SeqAIJ *)yy->B->data;
1405     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1406   } else {
1407     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 /* -------------------------------------------------------------------*/
1413 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1414        MatGetRow_MPIAIJ,
1415        MatRestoreRow_MPIAIJ,
1416        MatMult_MPIAIJ,
1417        MatMultAdd_MPIAIJ,
1418        MatMultTranspose_MPIAIJ,
1419        MatMultTransposeAdd_MPIAIJ,
1420        0,
1421        0,
1422        0,
1423        0,
1424        0,
1425        0,
1426        MatRelax_MPIAIJ,
1427        MatTranspose_MPIAIJ,
1428        MatGetInfo_MPIAIJ,
1429        MatEqual_MPIAIJ,
1430        MatGetDiagonal_MPIAIJ,
1431        MatDiagonalScale_MPIAIJ,
1432        MatNorm_MPIAIJ,
1433        MatAssemblyBegin_MPIAIJ,
1434        MatAssemblyEnd_MPIAIJ,
1435        0,
1436        MatSetOption_MPIAIJ,
1437        MatZeroEntries_MPIAIJ,
1438        MatZeroRows_MPIAIJ,
1439 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1440        MatLUFactorSymbolic_MPIAIJ_TFS,
1441 #else
1442        0,
1443 #endif
1444        0,
1445        0,
1446        0,
1447        MatSetUpPreallocation_MPIAIJ,
1448        0,
1449        0,
1450        0,
1451        0,
1452        MatDuplicate_MPIAIJ,
1453        0,
1454        0,
1455        0,
1456        0,
1457        MatAXPY_MPIAIJ,
1458        MatGetSubMatrices_MPIAIJ,
1459        MatIncreaseOverlap_MPIAIJ,
1460        MatGetValues_MPIAIJ,
1461        MatCopy_MPIAIJ,
1462        MatPrintHelp_MPIAIJ,
1463        MatScale_MPIAIJ,
1464        0,
1465        0,
1466        0,
1467        MatGetBlockSize_MPIAIJ,
1468        0,
1469        0,
1470        0,
1471        0,
1472        MatFDColoringCreate_MPIAIJ,
1473        0,
1474        MatSetUnfactored_MPIAIJ,
1475        0,
1476        0,
1477        MatGetSubMatrix_MPIAIJ,
1478        MatDestroy_MPIAIJ,
1479        MatView_MPIAIJ,
1480        MatGetPetscMaps_Petsc,
1481        0,
1482        0,
1483        0,
1484        0,
1485        0,
1486        0,
1487        0,
1488        0,
1489        MatSetColoring_MPIAIJ,
1490        MatSetValuesAdic_MPIAIJ,
1491        MatSetValuesAdifor_MPIAIJ
1492 };
1493 
1494 /* ----------------------------------------------------------------------------------------*/
1495 
1496 EXTERN_C_BEGIN
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1499 int MatStoreValues_MPIAIJ(Mat mat)
1500 {
1501   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1502   int        ierr;
1503 
1504   PetscFunctionBegin;
1505   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1506   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 EXTERN_C_END
1510 
1511 EXTERN_C_BEGIN
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1514 int MatRetrieveValues_MPIAIJ(Mat mat)
1515 {
1516   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1517   int        ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1521   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 EXTERN_C_END
1525 
1526 #include "petscpc.h"
1527 EXTERN_C_BEGIN
1528 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1529 EXTERN_C_END
1530 
1531 EXTERN_C_BEGIN
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatCreate_MPIAIJ"
1534 int MatCreate_MPIAIJ(Mat B)
1535 {
1536   Mat_MPIAIJ *b;
1537   int        ierr,i,size;
1538 #if defined(PETSC_HAVE_SUPERLUDIST)
1539   PetscTruth flg;
1540 #endif
1541 
1542 
1543   PetscFunctionBegin;
1544   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1545 
1546   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1547   B->data         = (void*)b;
1548   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1549   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1550   B->factor       = 0;
1551   B->assembled    = PETSC_FALSE;
1552   B->mapping      = 0;
1553 
1554   B->insertmode      = NOT_SET_VALUES;
1555   b->size            = size;
1556   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1557 
1558   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1559   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1560 
1561   /* the information in the maps duplicates the information computed below, eventually
1562      we should remove the duplicate information that is not contained in the maps */
1563   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1564   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1565 
1566   /* build local table of row and column ownerships */
1567   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1568   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1569   b->cowners = b->rowners + b->size + 2;
1570   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1571   b->rowners[0] = 0;
1572   for (i=2; i<=b->size; i++) {
1573     b->rowners[i] += b->rowners[i-1];
1574   }
1575   b->rstart = b->rowners[b->rank];
1576   b->rend   = b->rowners[b->rank+1];
1577   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1578   b->cowners[0] = 0;
1579   for (i=2; i<=b->size; i++) {
1580     b->cowners[i] += b->cowners[i-1];
1581   }
1582   b->cstart = b->cowners[b->rank];
1583   b->cend   = b->cowners[b->rank+1];
1584 
1585   /* build cache for off array entries formed */
1586   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1587   b->donotstash  = PETSC_FALSE;
1588   b->colmap      = 0;
1589   b->garray      = 0;
1590   b->roworiented = PETSC_TRUE;
1591 
1592   /* stuff used for matrix vector multiply */
1593   b->lvec      = PETSC_NULL;
1594   b->Mvctx     = PETSC_NULL;
1595 
1596   /* stuff for MatGetRow() */
1597   b->rowindices   = 0;
1598   b->rowvalues    = 0;
1599   b->getrowactive = PETSC_FALSE;
1600   /*
1601 #if defined(PETSC_HAVE_SUPERLUDIST)
1602   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1603   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1604 #endif
1605   */
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1607                                      "MatStoreValues_MPIAIJ",
1608                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1610                                      "MatRetrieveValues_MPIAIJ",
1611                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1613                                      "MatGetDiagonalBlock_MPIAIJ",
1614                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1615 
1616   PetscFunctionReturn(0);
1617 }
1618 EXTERN_C_END
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1622 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1623 {
1624   Mat        mat;
1625   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1626   int        ierr;
1627 
1628   PetscFunctionBegin;
1629   *newmat       = 0;
1630   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1631   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1632   a    = (Mat_MPIAIJ*)mat->data;
1633   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1634   mat->factor       = matin->factor;
1635   mat->assembled    = PETSC_TRUE;
1636   mat->insertmode   = NOT_SET_VALUES;
1637   mat->preallocated = PETSC_TRUE;
1638 
1639   a->rstart       = oldmat->rstart;
1640   a->rend         = oldmat->rend;
1641   a->cstart       = oldmat->cstart;
1642   a->cend         = oldmat->cend;
1643   a->size         = oldmat->size;
1644   a->rank         = oldmat->rank;
1645   a->donotstash   = oldmat->donotstash;
1646   a->roworiented  = oldmat->roworiented;
1647   a->rowindices   = 0;
1648   a->rowvalues    = 0;
1649   a->getrowactive = PETSC_FALSE;
1650 
1651   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1652   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1653   if (oldmat->colmap) {
1654 #if defined (PETSC_USE_CTABLE)
1655     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1656 #else
1657     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1658     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1659     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1660 #endif
1661   } else a->colmap = 0;
1662   if (oldmat->garray) {
1663     int len;
1664     len  = oldmat->B->n;
1665     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1666     PetscLogObjectMemory(mat,len*sizeof(int));
1667     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1668   } else a->garray = 0;
1669 
1670   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1671   PetscLogObjectParent(mat,a->lvec);
1672   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1673   PetscLogObjectParent(mat,a->Mvctx);
1674   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1675   PetscLogObjectParent(mat,a->A);
1676   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1677   PetscLogObjectParent(mat,a->B);
1678   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1679   *newmat = mat;
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #include "petscsys.h"
1684 
1685 EXTERN_C_BEGIN
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatLoad_MPIAIJ"
1688 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1689 {
1690   Mat          A;
1691   PetscScalar  *vals,*svals;
1692   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1693   MPI_Status   status;
1694   int          i,nz,ierr,j,rstart,rend,fd;
1695   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1696   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1697   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1698 
1699   PetscFunctionBegin;
1700   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1701   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1702   if (!rank) {
1703     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1704     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1705     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1706     if (header[3] < 0) {
1707       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1708     }
1709   }
1710 
1711   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1712   M = header[1]; N = header[2];
1713   /* determine ownership of all rows */
1714   m = M/size + ((M % size) > rank);
1715   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1716   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1717   rowners[0] = 0;
1718   for (i=2; i<=size; i++) {
1719     rowners[i] += rowners[i-1];
1720   }
1721   rstart = rowners[rank];
1722   rend   = rowners[rank+1];
1723 
1724   /* distribute row lengths to all processors */
1725   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1726   offlens = ourlens + (rend-rstart);
1727   if (!rank) {
1728     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1729     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1730     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1731     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1732     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1733     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1734   } else {
1735     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1736   }
1737 
1738   if (!rank) {
1739     /* calculate the number of nonzeros on each processor */
1740     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1741     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1742     for (i=0; i<size; i++) {
1743       for (j=rowners[i]; j< rowners[i+1]; j++) {
1744         procsnz[i] += rowlengths[j];
1745       }
1746     }
1747     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1748 
1749     /* determine max buffer needed and allocate it */
1750     maxnz = 0;
1751     for (i=0; i<size; i++) {
1752       maxnz = PetscMax(maxnz,procsnz[i]);
1753     }
1754     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1755 
1756     /* read in my part of the matrix column indices  */
1757     nz   = procsnz[0];
1758     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1759     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1760 
1761     /* read in every one elses and ship off */
1762     for (i=1; i<size; i++) {
1763       nz   = procsnz[i];
1764       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1765       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1766     }
1767     ierr = PetscFree(cols);CHKERRQ(ierr);
1768   } else {
1769     /* determine buffer space needed for message */
1770     nz = 0;
1771     for (i=0; i<m; i++) {
1772       nz += ourlens[i];
1773     }
1774     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1775 
1776     /* receive message of column indices*/
1777     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1778     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1779     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1780   }
1781 
1782   /* determine column ownership if matrix is not square */
1783   if (N != M) {
1784     n      = N/size + ((N % size) > rank);
1785     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1786     cstart = cend - n;
1787   } else {
1788     cstart = rstart;
1789     cend   = rend;
1790     n      = cend - cstart;
1791   }
1792 
1793   /* loop over local rows, determining number of off diagonal entries */
1794   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1795   jj = 0;
1796   for (i=0; i<m; i++) {
1797     for (j=0; j<ourlens[i]; j++) {
1798       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1799       jj++;
1800     }
1801   }
1802 
1803   /* create our matrix */
1804   for (i=0; i<m; i++) {
1805     ourlens[i] -= offlens[i];
1806   }
1807   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1808   A = *newmat;
1809   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1810   for (i=0; i<m; i++) {
1811     ourlens[i] += offlens[i];
1812   }
1813 
1814   if (!rank) {
1815     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1816 
1817     /* read in my part of the matrix numerical values  */
1818     nz   = procsnz[0];
1819     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1820 
1821     /* insert into matrix */
1822     jj      = rstart;
1823     smycols = mycols;
1824     svals   = vals;
1825     for (i=0; i<m; i++) {
1826       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1827       smycols += ourlens[i];
1828       svals   += ourlens[i];
1829       jj++;
1830     }
1831 
1832     /* read in other processors and ship out */
1833     for (i=1; i<size; i++) {
1834       nz   = procsnz[i];
1835       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1836       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1837     }
1838     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1839   } else {
1840     /* receive numeric values */
1841     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1842 
1843     /* receive message of values*/
1844     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1845     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1846     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1847 
1848     /* insert into matrix */
1849     jj      = rstart;
1850     smycols = mycols;
1851     svals   = vals;
1852     for (i=0; i<m; i++) {
1853       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1854       smycols += ourlens[i];
1855       svals   += ourlens[i];
1856       jj++;
1857     }
1858   }
1859   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1860   ierr = PetscFree(vals);CHKERRQ(ierr);
1861   ierr = PetscFree(mycols);CHKERRQ(ierr);
1862   ierr = PetscFree(rowners);CHKERRQ(ierr);
1863 
1864   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1865   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 EXTERN_C_END
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1872 /*
1873     Not great since it makes two copies of the submatrix, first an SeqAIJ
1874   in local and then by concatenating the local matrices the end result.
1875   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1876 */
1877 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1878 {
1879   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1880   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1881   Mat          *local,M,Mreuse;
1882   PetscScalar  *vwork,*aa;
1883   MPI_Comm     comm = mat->comm;
1884   Mat_SeqAIJ   *aij;
1885 
1886 
1887   PetscFunctionBegin;
1888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1889   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1890 
1891   if (call ==  MAT_REUSE_MATRIX) {
1892     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1893     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1894     local = &Mreuse;
1895     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1896   } else {
1897     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1898     Mreuse = *local;
1899     ierr   = PetscFree(local);CHKERRQ(ierr);
1900   }
1901 
1902   /*
1903       m - number of local rows
1904       n - number of columns (same on all processors)
1905       rstart - first row in new global matrix generated
1906   */
1907   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1908   if (call == MAT_INITIAL_MATRIX) {
1909     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1910     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1911     ii  = aij->i;
1912     jj  = aij->j;
1913 
1914     /*
1915         Determine the number of non-zeros in the diagonal and off-diagonal
1916         portions of the matrix in order to do correct preallocation
1917     */
1918 
1919     /* first get start and end of "diagonal" columns */
1920     if (csize == PETSC_DECIDE) {
1921       nlocal = n/size + ((n % size) > rank);
1922     } else {
1923       nlocal = csize;
1924     }
1925     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1926     rstart = rend - nlocal;
1927     if (rank == size - 1 && rend != n) {
1928       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1929     }
1930 
1931     /* next, compute all the lengths */
1932     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1933     olens = dlens + m;
1934     for (i=0; i<m; i++) {
1935       jend = ii[i+1] - ii[i];
1936       olen = 0;
1937       dlen = 0;
1938       for (j=0; j<jend; j++) {
1939         if (*jj < rstart || *jj >= rend) olen++;
1940         else dlen++;
1941         jj++;
1942       }
1943       olens[i] = olen;
1944       dlens[i] = dlen;
1945     }
1946     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1947     ierr = PetscFree(dlens);CHKERRQ(ierr);
1948   } else {
1949     int ml,nl;
1950 
1951     M = *newmat;
1952     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1953     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1954     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1955     /*
1956          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1957        rather than the slower MatSetValues().
1958     */
1959     M->was_assembled = PETSC_TRUE;
1960     M->assembled     = PETSC_FALSE;
1961   }
1962   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1963   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1964   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1965   ii  = aij->i;
1966   jj  = aij->j;
1967   aa  = aij->a;
1968   for (i=0; i<m; i++) {
1969     row   = rstart + i;
1970     nz    = ii[i+1] - ii[i];
1971     cwork = jj;     jj += nz;
1972     vwork = aa;     aa += nz;
1973     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1974   }
1975 
1976   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1977   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1978   *newmat = M;
1979 
1980   /* save submatrix used in processor for next request */
1981   if (call ==  MAT_INITIAL_MATRIX) {
1982     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1983     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1984   }
1985 
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1991 /*@C
1992    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1993    (the default parallel PETSc format).  For good matrix assembly performance
1994    the user should preallocate the matrix storage by setting the parameters
1995    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1996    performance can be increased by more than a factor of 50.
1997 
1998    Collective on MPI_Comm
1999 
2000    Input Parameters:
2001 +  A - the matrix
2002 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2003            (same value is used for all local rows)
2004 .  d_nnz - array containing the number of nonzeros in the various rows of the
2005            DIAGONAL portion of the local submatrix (possibly different for each row)
2006            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2007            The size of this array is equal to the number of local rows, i.e 'm'.
2008            You must leave room for the diagonal entry even if it is zero.
2009 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2010            submatrix (same value is used for all local rows).
2011 -  o_nnz - array containing the number of nonzeros in the various rows of the
2012            OFF-DIAGONAL portion of the local submatrix (possibly different for
2013            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2014            structure. The size of this array is equal to the number
2015            of local rows, i.e 'm'.
2016 
2017    The AIJ format (also called the Yale sparse matrix format or
2018    compressed row storage), is fully compatible with standard Fortran 77
2019    storage.  That is, the stored row and column indices can begin at
2020    either one (as in Fortran) or zero.  See the users manual for details.
2021 
2022    The user MUST specify either the local or global matrix dimensions
2023    (possibly both).
2024 
2025    The parallel matrix is partitioned such that the first m0 rows belong to
2026    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2027    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2028 
2029    The DIAGONAL portion of the local submatrix of a processor can be defined
2030    as the submatrix which is obtained by extraction the part corresponding
2031    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2032    first row that belongs to the processor, and r2 is the last row belonging
2033    to the this processor. This is a square mxm matrix. The remaining portion
2034    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2035 
2036    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2037 
2038    By default, this format uses inodes (identical nodes) when possible.
2039    We search for consecutive rows with the same nonzero structure, thereby
2040    reusing matrix information to achieve increased efficiency.
2041 
2042    Options Database Keys:
2043 +  -mat_aij_no_inode  - Do not use inodes
2044 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2045 -  -mat_aij_oneindex - Internally use indexing starting at 1
2046         rather than 0.  Note that when calling MatSetValues(),
2047         the user still MUST index entries starting at 0!
2048 
2049    Example usage:
2050 
2051    Consider the following 8x8 matrix with 34 non-zero values, that is
2052    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2053    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2054    as follows:
2055 
2056 .vb
2057             1  2  0  |  0  3  0  |  0  4
2058     Proc0   0  5  6  |  7  0  0  |  8  0
2059             9  0 10  | 11  0  0  | 12  0
2060     -------------------------------------
2061            13  0 14  | 15 16 17  |  0  0
2062     Proc1   0 18  0  | 19 20 21  |  0  0
2063             0  0  0  | 22 23  0  | 24  0
2064     -------------------------------------
2065     Proc2  25 26 27  |  0  0 28  | 29  0
2066            30  0  0  | 31 32 33  |  0 34
2067 .ve
2068 
2069    This can be represented as a collection of submatrices as:
2070 
2071 .vb
2072       A B C
2073       D E F
2074       G H I
2075 .ve
2076 
2077    Where the submatrices A,B,C are owned by proc0, D,E,F are
2078    owned by proc1, G,H,I are owned by proc2.
2079 
2080    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2081    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2082    The 'M','N' parameters are 8,8, and have the same values on all procs.
2083 
2084    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2085    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2086    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2087    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2088    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2089    matrix, ans [DF] as another SeqAIJ matrix.
2090 
2091    When d_nz, o_nz parameters are specified, d_nz storage elements are
2092    allocated for every row of the local diagonal submatrix, and o_nz
2093    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2094    One way to choose d_nz and o_nz is to use the max nonzerors per local
2095    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2096    In this case, the values of d_nz,o_nz are:
2097 .vb
2098      proc0 : dnz = 2, o_nz = 2
2099      proc1 : dnz = 3, o_nz = 2
2100      proc2 : dnz = 1, o_nz = 4
2101 .ve
2102    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2103    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2104    for proc3. i.e we are using 12+15+10=37 storage locations to store
2105    34 values.
2106 
2107    When d_nnz, o_nnz parameters are specified, the storage is specified
2108    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2109    In the above case the values for d_nnz,o_nnz are:
2110 .vb
2111      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2112      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2113      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2114 .ve
2115    Here the space allocated is sum of all the above values i.e 34, and
2116    hence pre-allocation is perfect.
2117 
2118    Level: intermediate
2119 
2120 .keywords: matrix, aij, compressed row, sparse, parallel
2121 
2122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2123 @*/
2124 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2125 {
2126   Mat_MPIAIJ   *b;
2127   int          ierr,i;
2128   PetscTruth   flg2;
2129 
2130   PetscFunctionBegin;
2131   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2132   if (!flg2) PetscFunctionReturn(0);
2133   B->preallocated = PETSC_TRUE;
2134   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2135   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2136   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2137   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2138   if (d_nnz) {
2139     for (i=0; i<B->m; i++) {
2140       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2141     }
2142   }
2143   if (o_nnz) {
2144     for (i=0; i<B->m; i++) {
2145       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2146     }
2147   }
2148   b = (Mat_MPIAIJ*)B->data;
2149 
2150   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2151   PetscLogObjectParent(B,b->A);
2152   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2153   PetscLogObjectParent(B,b->B);
2154 
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatCreateMPIAIJ"
2160 /*@C
2161    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2162    (the default parallel PETSc format).  For good matrix assembly performance
2163    the user should preallocate the matrix storage by setting the parameters
2164    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2165    performance can be increased by more than a factor of 50.
2166 
2167    Collective on MPI_Comm
2168 
2169    Input Parameters:
2170 +  comm - MPI communicator
2171 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2172            This value should be the same as the local size used in creating the
2173            y vector for the matrix-vector product y = Ax.
2174 .  n - This value should be the same as the local size used in creating the
2175        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2176        calculated if N is given) For square matrices n is almost always m.
2177 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2178 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2179 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2180            (same value is used for all local rows)
2181 .  d_nnz - array containing the number of nonzeros in the various rows of the
2182            DIAGONAL portion of the local submatrix (possibly different for each row)
2183            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2184            The size of this array is equal to the number of local rows, i.e 'm'.
2185            You must leave room for the diagonal entry even if it is zero.
2186 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2187            submatrix (same value is used for all local rows).
2188 -  o_nnz - array containing the number of nonzeros in the various rows of the
2189            OFF-DIAGONAL portion of the local submatrix (possibly different for
2190            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2191            structure. The size of this array is equal to the number
2192            of local rows, i.e 'm'.
2193 
2194    Output Parameter:
2195 .  A - the matrix
2196 
2197    Notes:
2198    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2199    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2200    storage requirements for this matrix.
2201 
2202    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2203    processor than it must be used on all processors that share the object for
2204    that argument.
2205 
2206    The AIJ format (also called the Yale sparse matrix format or
2207    compressed row storage), is fully compatible with standard Fortran 77
2208    storage.  That is, the stored row and column indices can begin at
2209    either one (as in Fortran) or zero.  See the users manual for details.
2210 
2211    The user MUST specify either the local or global matrix dimensions
2212    (possibly both).
2213 
2214    The parallel matrix is partitioned such that the first m0 rows belong to
2215    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2216    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2217 
2218    The DIAGONAL portion of the local submatrix of a processor can be defined
2219    as the submatrix which is obtained by extraction the part corresponding
2220    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2221    first row that belongs to the processor, and r2 is the last row belonging
2222    to the this processor. This is a square mxm matrix. The remaining portion
2223    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2224 
2225    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2226 
2227    By default, this format uses inodes (identical nodes) when possible.
2228    We search for consecutive rows with the same nonzero structure, thereby
2229    reusing matrix information to achieve increased efficiency.
2230 
2231    Options Database Keys:
2232 +  -mat_aij_no_inode  - Do not use inodes
2233 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2234 -  -mat_aij_oneindex - Internally use indexing starting at 1
2235         rather than 0.  Note that when calling MatSetValues(),
2236         the user still MUST index entries starting at 0!
2237 
2238 
2239    Example usage:
2240 
2241    Consider the following 8x8 matrix with 34 non-zero values, that is
2242    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2243    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2244    as follows:
2245 
2246 .vb
2247             1  2  0  |  0  3  0  |  0  4
2248     Proc0   0  5  6  |  7  0  0  |  8  0
2249             9  0 10  | 11  0  0  | 12  0
2250     -------------------------------------
2251            13  0 14  | 15 16 17  |  0  0
2252     Proc1   0 18  0  | 19 20 21  |  0  0
2253             0  0  0  | 22 23  0  | 24  0
2254     -------------------------------------
2255     Proc2  25 26 27  |  0  0 28  | 29  0
2256            30  0  0  | 31 32 33  |  0 34
2257 .ve
2258 
2259    This can be represented as a collection of submatrices as:
2260 
2261 .vb
2262       A B C
2263       D E F
2264       G H I
2265 .ve
2266 
2267    Where the submatrices A,B,C are owned by proc0, D,E,F are
2268    owned by proc1, G,H,I are owned by proc2.
2269 
2270    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2271    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2272    The 'M','N' parameters are 8,8, and have the same values on all procs.
2273 
2274    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2275    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2276    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2277    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2278    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2279    matrix, ans [DF] as another SeqAIJ matrix.
2280 
2281    When d_nz, o_nz parameters are specified, d_nz storage elements are
2282    allocated for every row of the local diagonal submatrix, and o_nz
2283    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2284    One way to choose d_nz and o_nz is to use the max nonzerors per local
2285    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2286    In this case, the values of d_nz,o_nz are:
2287 .vb
2288      proc0 : dnz = 2, o_nz = 2
2289      proc1 : dnz = 3, o_nz = 2
2290      proc2 : dnz = 1, o_nz = 4
2291 .ve
2292    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2293    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2294    for proc3. i.e we are using 12+15+10=37 storage locations to store
2295    34 values.
2296 
2297    When d_nnz, o_nnz parameters are specified, the storage is specified
2298    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2299    In the above case the values for d_nnz,o_nnz are:
2300 .vb
2301      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2302      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2303      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2304 .ve
2305    Here the space allocated is sum of all the above values i.e 34, and
2306    hence pre-allocation is perfect.
2307 
2308    Level: intermediate
2309 
2310 .keywords: matrix, aij, compressed row, sparse, parallel
2311 
2312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2313 @*/
2314 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2315 {
2316   int ierr,size;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2321   if (size > 1) {
2322     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2323     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2324   } else {
2325     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2326     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2327   }
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2333 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2334 {
2335   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2336   PetscFunctionBegin;
2337   *Ad     = a->A;
2338   *Ao     = a->B;
2339   *colmap = a->garray;
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2345 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2346 {
2347   int        ierr;
2348   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2349 
2350   PetscFunctionBegin;
2351   if (coloring->ctype == IS_COLORING_LOCAL) {
2352     int        *allcolors,*colors,i;
2353     ISColoring ocoloring;
2354 
2355     /* set coloring for diagonal portion */
2356     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2357 
2358     /* set coloring for off-diagonal portion */
2359     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2360     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2361     for (i=0; i<a->B->n; i++) {
2362       colors[i] = allcolors[a->garray[i]];
2363     }
2364     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2365     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2366     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2367     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2368   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2369     int        *colors,i,*larray;
2370     ISColoring ocoloring;
2371 
2372     /* set coloring for diagonal portion */
2373     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2374     for (i=0; i<a->A->n; i++) {
2375       larray[i] = i + a->cstart;
2376     }
2377     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2378     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2379     for (i=0; i<a->A->n; i++) {
2380       colors[i] = coloring->colors[larray[i]];
2381     }
2382     ierr = PetscFree(larray);CHKERRQ(ierr);
2383     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2384     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2385     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2386 
2387     /* set coloring for off-diagonal portion */
2388     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2389     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2390     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2391     for (i=0; i<a->B->n; i++) {
2392       colors[i] = coloring->colors[larray[i]];
2393     }
2394     ierr = PetscFree(larray);CHKERRQ(ierr);
2395     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2396     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2397     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2398   } else {
2399     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2400   }
2401 
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2407 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2408 {
2409   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2410   int        ierr;
2411 
2412   PetscFunctionBegin;
2413   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2414   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2420 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2421 {
2422   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2423   int        ierr;
2424 
2425   PetscFunctionBegin;
2426   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2427   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430