xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 73dfda556bb8652509ae9b6cc7adde047a156e57)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     PetscScalar *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832     This has to provide several versions.
833 
834      2) a) use only local smoothing updating outer values only once.
835         b) local smoothing updating outer values each inner iteration
836      3) color updating out values betwen colors.
837 */
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRelax_MPIAIJ"
840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
841 {
842   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
843   Mat          AA = mat->A,BB = mat->B;
844   Mat_SeqAIJ   *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
845   PetscScalar  *b,*x,*xs,*ls,d,*v,sum;
846   int          ierr,*idx,*diag;
847   int          n = matin->n,m = matin->m,i,shift = A->indexshift;
848 
849   PetscFunctionBegin;
850   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
851   diag = A->diag;
852 
853   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
854     if (flag & SOR_ZERO_INITIAL_GUESS) {
855       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,1,xx);CHKERRQ(ierr);
856       its--;
857     }
858 
859     while (its--) {
860       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
861       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
862 
863       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
864       if (xx != bb) {
865         ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
866       } else {
867         b = x;
868       }
869       ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
870 
871       xs = x + shift;
872       ls = ls + shift; /* shift by one for index start of 1 */
873 
874       /* go down through the rows */
875       for (i=0; i<m; i++) {
876         n    = A->i[i+1] - A->i[i];
877 	PetscLogFlops(4*n+3);
878         idx  = A->j + A->i[i] + shift;
879         v    = A->a + A->i[i] + shift;
880         sum  = b[i];
881         SPARSEDENSEMDOT(sum,xs,v,idx,n);
882         d    = fshift + A->a[diag[i]+shift];
883         n    = B->i[i+1] - B->i[i];
884         idx  = B->j + B->i[i] + shift;
885         v    = B->a + B->i[i] + shift;
886         SPARSEDENSEMDOT(sum,ls,v,idx,n);
887         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
888       }
889       /* come up through the rows */
890       for (i=m-1; i>-1; i--) {
891         n    = A->i[i+1] - A->i[i];
892 	PetscLogFlops(4*n+3);
893         idx  = A->j + A->i[i] + shift;
894         v    = A->a + A->i[i] + shift;
895         sum  = b[i];
896         SPARSEDENSEMDOT(sum,xs,v,idx,n);
897         d    = fshift + A->a[diag[i]+shift];
898         n    = B->i[i+1] - B->i[i];
899         idx  = B->j + B->i[i] + shift;
900         v    = B->a + B->i[i] + shift;
901         SPARSEDENSEMDOT(sum,ls,v,idx,n);
902         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
903       }
904 
905       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
906       if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
907       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
908     }
909   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
910     if (flag & SOR_ZERO_INITIAL_GUESS) {
911       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
912       its--;
913     }
914     while (its--) {
915       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
916       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
917       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918       if (xx != bb) {
919         ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
920       } else {
921         b = x;
922       }
923       ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
924       xs = x + shift; /* shift by one for index start of 1 */
925       ls = ls + shift;
926 
927       for (i=0; i<m; i++) {
928         n    = A->i[i+1] - A->i[i];
929 	PetscLogFlops(4*n+3);
930         idx  = A->j + A->i[i] + shift;
931         v    = A->a + A->i[i] + shift;
932         sum  = b[i];
933         SPARSEDENSEMDOT(sum,xs,v,idx,n);
934         d    = fshift + A->a[diag[i]+shift];
935         n    = B->i[i+1] - B->i[i];
936         idx  = B->j + B->i[i] + shift;
937         v    = B->a + B->i[i] + shift;
938         SPARSEDENSEMDOT(sum,ls,v,idx,n);
939         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
940       }
941       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
942       if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
943       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
944     }
945   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
946     if (flag & SOR_ZERO_INITIAL_GUESS) {
947       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
948       its--;
949     }
950     while (its--) {
951       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
952       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
953       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
954       if (xx != bb) {
955         ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
956       } else {
957         b = x;
958       }
959       ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
960       xs = x + shift; /* shift by one for index start of 1 */
961       ls = ls + shift;
962 
963       for (i=m-1; i>-1; i--) {
964         n    = A->i[i+1] - A->i[i];
965 	PetscLogFlops(4*n+3);
966         idx  = A->j + A->i[i] + shift;
967         v    = A->a + A->i[i] + shift;
968         sum  = b[i];
969         SPARSEDENSEMDOT(sum,xs,v,idx,n);
970         d    = fshift + A->a[diag[i]+shift];
971         n    = B->i[i+1] - B->i[i];
972         idx  = B->j + B->i[i] + shift;
973         v    = B->a + B->i[i] + shift;
974         SPARSEDENSEMDOT(sum,ls,v,idx,n);
975         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
976       }
977 
978     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
979     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
980     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
981     }
982   } else {
983     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
984   }
985   PetscFunctionReturn(0);
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "MatGetInfo_MPIAIJ"
990 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
991 {
992   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
993   Mat        A = mat->A,B = mat->B;
994   int        ierr;
995   PetscReal  isend[5],irecv[5];
996 
997   PetscFunctionBegin;
998   info->block_size     = 1.0;
999   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1000   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1001   isend[3] = info->memory;  isend[4] = info->mallocs;
1002   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1003   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1004   isend[3] += info->memory;  isend[4] += info->mallocs;
1005   if (flag == MAT_LOCAL) {
1006     info->nz_used      = isend[0];
1007     info->nz_allocated = isend[1];
1008     info->nz_unneeded  = isend[2];
1009     info->memory       = isend[3];
1010     info->mallocs      = isend[4];
1011   } else if (flag == MAT_GLOBAL_MAX) {
1012     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1013     info->nz_used      = irecv[0];
1014     info->nz_allocated = irecv[1];
1015     info->nz_unneeded  = irecv[2];
1016     info->memory       = irecv[3];
1017     info->mallocs      = irecv[4];
1018   } else if (flag == MAT_GLOBAL_SUM) {
1019     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1020     info->nz_used      = irecv[0];
1021     info->nz_allocated = irecv[1];
1022     info->nz_unneeded  = irecv[2];
1023     info->memory       = irecv[3];
1024     info->mallocs      = irecv[4];
1025   }
1026   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1027   info->fill_ratio_needed = 0;
1028   info->factor_mallocs    = 0;
1029   info->rows_global       = (double)matin->M;
1030   info->columns_global    = (double)matin->N;
1031   info->rows_local        = (double)matin->m;
1032   info->columns_local     = (double)matin->N;
1033 
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatSetOption_MPIAIJ"
1039 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1040 {
1041   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1042   int        ierr;
1043 
1044   PetscFunctionBegin;
1045   switch (op) {
1046   case MAT_NO_NEW_NONZERO_LOCATIONS:
1047   case MAT_YES_NEW_NONZERO_LOCATIONS:
1048   case MAT_COLUMNS_UNSORTED:
1049   case MAT_COLUMNS_SORTED:
1050   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1051   case MAT_KEEP_ZEROED_ROWS:
1052   case MAT_NEW_NONZERO_LOCATION_ERR:
1053   case MAT_USE_INODES:
1054   case MAT_DO_NOT_USE_INODES:
1055   case MAT_IGNORE_ZERO_ENTRIES:
1056     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1057     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1058     break;
1059   case MAT_ROW_ORIENTED:
1060     a->roworiented = PETSC_TRUE;
1061     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1062     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1063     break;
1064   case MAT_ROWS_SORTED:
1065   case MAT_ROWS_UNSORTED:
1066   case MAT_YES_NEW_DIAGONALS:
1067   case MAT_USE_SINGLE_PRECISION_SOLVES:
1068     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1069     break;
1070   case MAT_COLUMN_ORIENTED:
1071     a->roworiented = PETSC_FALSE;
1072     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1073     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1074     break;
1075   case MAT_IGNORE_OFF_PROC_ENTRIES:
1076     a->donotstash = PETSC_TRUE;
1077     break;
1078   case MAT_NO_NEW_DIAGONALS:
1079     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1080     break;
1081   default:
1082     SETERRQ(PETSC_ERR_SUP,"unknown option");
1083     break;
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRow_MPIAIJ"
1090 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1091 {
1092   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1093   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1094   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1095   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1096   int          *cmap,*idx_p;
1097 
1098   PetscFunctionBegin;
1099   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1100   mat->getrowactive = PETSC_TRUE;
1101 
1102   if (!mat->rowvalues && (idx || v)) {
1103     /*
1104         allocate enough space to hold information from the longest row.
1105     */
1106     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1107     int     max = 1,tmp;
1108     for (i=0; i<matin->m; i++) {
1109       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1110       if (max < tmp) { max = tmp; }
1111     }
1112     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1113     mat->rowindices = (int*)(mat->rowvalues + max);
1114   }
1115 
1116   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1117   lrow = row - rstart;
1118 
1119   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1120   if (!v)   {pvA = 0; pvB = 0;}
1121   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1122   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1123   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1124   nztot = nzA + nzB;
1125 
1126   cmap  = mat->garray;
1127   if (v  || idx) {
1128     if (nztot) {
1129       /* Sort by increasing column numbers, assuming A and B already sorted */
1130       int imark = -1;
1131       if (v) {
1132         *v = v_p = mat->rowvalues;
1133         for (i=0; i<nzB; i++) {
1134           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1135           else break;
1136         }
1137         imark = i;
1138         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1139         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1140       }
1141       if (idx) {
1142         *idx = idx_p = mat->rowindices;
1143         if (imark > -1) {
1144           for (i=0; i<imark; i++) {
1145             idx_p[i] = cmap[cworkB[i]];
1146           }
1147         } else {
1148           for (i=0; i<nzB; i++) {
1149             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1150             else break;
1151           }
1152           imark = i;
1153         }
1154         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1155         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1156       }
1157     } else {
1158       if (idx) *idx = 0;
1159       if (v)   *v   = 0;
1160     }
1161   }
1162   *nz = nztot;
1163   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1164   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1170 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1171 {
1172   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1173 
1174   PetscFunctionBegin;
1175   if (aij->getrowactive == PETSC_FALSE) {
1176     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1177   }
1178   aij->getrowactive = PETSC_FALSE;
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatNorm_MPIAIJ"
1184 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1185 {
1186   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1187   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1188   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1189   PetscReal    sum = 0.0;
1190   PetscScalar  *v;
1191 
1192   PetscFunctionBegin;
1193   if (aij->size == 1) {
1194     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1195   } else {
1196     if (type == NORM_FROBENIUS) {
1197       v = amat->a;
1198       for (i=0; i<amat->nz; i++) {
1199 #if defined(PETSC_USE_COMPLEX)
1200         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1201 #else
1202         sum += (*v)*(*v); v++;
1203 #endif
1204       }
1205       v = bmat->a;
1206       for (i=0; i<bmat->nz; i++) {
1207 #if defined(PETSC_USE_COMPLEX)
1208         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1209 #else
1210         sum += (*v)*(*v); v++;
1211 #endif
1212       }
1213       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1214       *norm = sqrt(*norm);
1215     } else if (type == NORM_1) { /* max column norm */
1216       PetscReal *tmp,*tmp2;
1217       int    *jj,*garray = aij->garray;
1218       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1219       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1220       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1221       *norm = 0.0;
1222       v = amat->a; jj = amat->j;
1223       for (j=0; j<amat->nz; j++) {
1224         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1225       }
1226       v = bmat->a; jj = bmat->j;
1227       for (j=0; j<bmat->nz; j++) {
1228         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1229       }
1230       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1231       for (j=0; j<mat->N; j++) {
1232         if (tmp2[j] > *norm) *norm = tmp2[j];
1233       }
1234       ierr = PetscFree(tmp);CHKERRQ(ierr);
1235       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1236     } else if (type == NORM_INFINITY) { /* max row norm */
1237       PetscReal ntemp = 0.0;
1238       for (j=0; j<aij->A->m; j++) {
1239         v = amat->a + amat->i[j] + shift;
1240         sum = 0.0;
1241         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1242           sum += PetscAbsScalar(*v); v++;
1243         }
1244         v = bmat->a + bmat->i[j] + shift;
1245         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1246           sum += PetscAbsScalar(*v); v++;
1247         }
1248         if (sum > ntemp) ntemp = sum;
1249       }
1250       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1251     } else {
1252       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatTranspose_MPIAIJ"
1260 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1261 {
1262   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1263   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1264   int          ierr,shift = Aloc->indexshift;
1265   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1266   Mat          B;
1267   PetscScalar  *array;
1268 
1269   PetscFunctionBegin;
1270   if (!matout && M != N) {
1271     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1272   }
1273 
1274   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1275 
1276   /* copy over the A part */
1277   Aloc = (Mat_SeqAIJ*)a->A->data;
1278   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1279   row = a->rstart;
1280   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1281   for (i=0; i<m; i++) {
1282     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1283     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1284   }
1285   aj = Aloc->j;
1286   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1287 
1288   /* copy over the B part */
1289   Aloc = (Mat_SeqAIJ*)a->B->data;
1290   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1291   row  = a->rstart;
1292   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1293   ct   = cols;
1294   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1295   for (i=0; i<m; i++) {
1296     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1297     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1298   }
1299   ierr = PetscFree(ct);CHKERRQ(ierr);
1300   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1301   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1302   if (matout) {
1303     *matout = B;
1304   } else {
1305     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1312 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1313 {
1314   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat        a = aij->A,b = aij->B;
1316   int        ierr,s1,s2,s3;
1317 
1318   PetscFunctionBegin;
1319   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1320   if (rr) {
1321     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1322     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1323     /* Overlap communication with computation. */
1324     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1325   }
1326   if (ll) {
1327     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1328     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1329     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1330   }
1331   /* scale  the diagonal block */
1332   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1333 
1334   if (rr) {
1335     /* Do a scatter end and then right scale the off-diagonal block */
1336     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1337     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1338   }
1339 
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1346 int MatPrintHelp_MPIAIJ(Mat A)
1347 {
1348   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1349   int        ierr;
1350 
1351   PetscFunctionBegin;
1352   if (!a->rank) {
1353     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1360 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1361 {
1362   PetscFunctionBegin;
1363   *bs = 1;
1364   PetscFunctionReturn(0);
1365 }
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1368 int MatSetUnfactored_MPIAIJ(Mat A)
1369 {
1370   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1371   int        ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatEqual_MPIAIJ"
1380 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1381 {
1382   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1383   Mat        a,b,c,d;
1384   PetscTruth flg;
1385   int        ierr;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1389   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1390   a = matA->A; b = matA->B;
1391   c = matB->A; d = matB->B;
1392 
1393   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1394   if (flg == PETSC_TRUE) {
1395     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1396   }
1397   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatCopy_MPIAIJ"
1403 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1404 {
1405   int        ierr;
1406   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1407   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1408   PetscTruth flg;
1409 
1410   PetscFunctionBegin;
1411   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1412   if (str != SAME_NONZERO_PATTERN || !flg) {
1413     /* because of the column compression in the off-processor part of the matrix a->B,
1414        the number of columns in a->B and b->B may be different, hence we cannot call
1415        the MatCopy() directly on the two parts. If need be, we can provide a more
1416        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1417        then copying the submatrices */
1418     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1419   } else {
1420     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1421     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1428 int MatSetUpPreallocation_MPIAIJ(Mat A)
1429 {
1430   int        ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1438 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1439 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1440 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1441 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1442 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1443 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1444 #endif
1445 
1446 /* -------------------------------------------------------------------*/
1447 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1448        MatGetRow_MPIAIJ,
1449        MatRestoreRow_MPIAIJ,
1450        MatMult_MPIAIJ,
1451        MatMultAdd_MPIAIJ,
1452        MatMultTranspose_MPIAIJ,
1453        MatMultTransposeAdd_MPIAIJ,
1454        0,
1455        0,
1456        0,
1457        0,
1458        0,
1459        0,
1460        MatRelax_MPIAIJ,
1461        MatTranspose_MPIAIJ,
1462        MatGetInfo_MPIAIJ,
1463        MatEqual_MPIAIJ,
1464        MatGetDiagonal_MPIAIJ,
1465        MatDiagonalScale_MPIAIJ,
1466        MatNorm_MPIAIJ,
1467        MatAssemblyBegin_MPIAIJ,
1468        MatAssemblyEnd_MPIAIJ,
1469        0,
1470        MatSetOption_MPIAIJ,
1471        MatZeroEntries_MPIAIJ,
1472        MatZeroRows_MPIAIJ,
1473 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1474        MatLUFactorSymbolic_MPIAIJ_TFS,
1475 #else
1476        0,
1477 #endif
1478        0,
1479        0,
1480        0,
1481        MatSetUpPreallocation_MPIAIJ,
1482        0,
1483        0,
1484        0,
1485        0,
1486        MatDuplicate_MPIAIJ,
1487        0,
1488        0,
1489        0,
1490        0,
1491        0,
1492        MatGetSubMatrices_MPIAIJ,
1493        MatIncreaseOverlap_MPIAIJ,
1494        MatGetValues_MPIAIJ,
1495        MatCopy_MPIAIJ,
1496        MatPrintHelp_MPIAIJ,
1497        MatScale_MPIAIJ,
1498        0,
1499        0,
1500        0,
1501        MatGetBlockSize_MPIAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506        MatFDColoringCreate_MPIAIJ,
1507        0,
1508        MatSetUnfactored_MPIAIJ,
1509        0,
1510        0,
1511        MatGetSubMatrix_MPIAIJ,
1512        MatDestroy_MPIAIJ,
1513        MatView_MPIAIJ,
1514        MatGetPetscMaps_Petsc,
1515        0,
1516        0,
1517        0,
1518        0,
1519        0,
1520        0,
1521        0,
1522        0,
1523        MatSetColoring_MPIAIJ,
1524        MatSetValuesAdic_MPIAIJ,
1525        MatSetValuesAdifor_MPIAIJ
1526 };
1527 
1528 /* ----------------------------------------------------------------------------------------*/
1529 
1530 EXTERN_C_BEGIN
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1533 int MatStoreValues_MPIAIJ(Mat mat)
1534 {
1535   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1536   int        ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1540   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 EXTERN_C_END
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1548 int MatRetrieveValues_MPIAIJ(Mat mat)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1551   int        ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1555   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 EXTERN_C_END
1559 
1560 #include "petscpc.h"
1561 EXTERN_C_BEGIN
1562 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1563 EXTERN_C_END
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatCreate_MPIAIJ"
1568 int MatCreate_MPIAIJ(Mat B)
1569 {
1570   Mat_MPIAIJ   *b;
1571   int          ierr,i,size;
1572 
1573   PetscFunctionBegin;
1574   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1575 
1576   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1577   B->data         = (void*)b;
1578   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1579   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1580   B->factor       = 0;
1581   B->assembled    = PETSC_FALSE;
1582   B->mapping      = 0;
1583 
1584   B->insertmode      = NOT_SET_VALUES;
1585   b->size            = size;
1586   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1587 
1588   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1589   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1590 
1591   /* the information in the maps duplicates the information computed below, eventually
1592      we should remove the duplicate information that is not contained in the maps */
1593   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1594   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1595 
1596   /* build local table of row and column ownerships */
1597   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1598   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1599   b->cowners = b->rowners + b->size + 2;
1600   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1601   b->rowners[0] = 0;
1602   for (i=2; i<=b->size; i++) {
1603     b->rowners[i] += b->rowners[i-1];
1604   }
1605   b->rstart = b->rowners[b->rank];
1606   b->rend   = b->rowners[b->rank+1];
1607   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1608   b->cowners[0] = 0;
1609   for (i=2; i<=b->size; i++) {
1610     b->cowners[i] += b->cowners[i-1];
1611   }
1612   b->cstart = b->cowners[b->rank];
1613   b->cend   = b->cowners[b->rank+1];
1614 
1615   /* build cache for off array entries formed */
1616   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1617   b->donotstash  = PETSC_FALSE;
1618   b->colmap      = 0;
1619   b->garray      = 0;
1620   b->roworiented = PETSC_TRUE;
1621 
1622   /* stuff used for matrix vector multiply */
1623   b->lvec      = PETSC_NULL;
1624   b->Mvctx     = PETSC_NULL;
1625 
1626   /* stuff for MatGetRow() */
1627   b->rowindices   = 0;
1628   b->rowvalues    = 0;
1629   b->getrowactive = PETSC_FALSE;
1630 
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1632                                      "MatStoreValues_MPIAIJ",
1633                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1635                                      "MatRetrieveValues_MPIAIJ",
1636                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1638                                      "MatGetDiagonalBlock_MPIAIJ",
1639                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1640   PetscFunctionReturn(0);
1641 }
1642 EXTERN_C_END
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1646 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1647 {
1648   Mat        mat;
1649   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1650   int        ierr;
1651 
1652   PetscFunctionBegin;
1653   *newmat       = 0;
1654   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1655   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1656   a    = (Mat_MPIAIJ*)mat->data;
1657   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1658   mat->factor       = matin->factor;
1659   mat->assembled    = PETSC_TRUE;
1660   mat->insertmode   = NOT_SET_VALUES;
1661   mat->preallocated = PETSC_TRUE;
1662 
1663   a->rstart       = oldmat->rstart;
1664   a->rend         = oldmat->rend;
1665   a->cstart       = oldmat->cstart;
1666   a->cend         = oldmat->cend;
1667   a->size         = oldmat->size;
1668   a->rank         = oldmat->rank;
1669   a->donotstash   = oldmat->donotstash;
1670   a->roworiented  = oldmat->roworiented;
1671   a->rowindices   = 0;
1672   a->rowvalues    = 0;
1673   a->getrowactive = PETSC_FALSE;
1674 
1675   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1676   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1677   if (oldmat->colmap) {
1678 #if defined (PETSC_USE_CTABLE)
1679     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1680 #else
1681     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1682     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1683     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1684 #endif
1685   } else a->colmap = 0;
1686   if (oldmat->garray) {
1687     int len;
1688     len  = oldmat->B->n;
1689     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1690     PetscLogObjectMemory(mat,len*sizeof(int));
1691     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1692   } else a->garray = 0;
1693 
1694   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1695   PetscLogObjectParent(mat,a->lvec);
1696   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1697   PetscLogObjectParent(mat,a->Mvctx);
1698   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1699   PetscLogObjectParent(mat,a->A);
1700   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1701   PetscLogObjectParent(mat,a->B);
1702   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1703   *newmat = mat;
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #include "petscsys.h"
1708 
1709 EXTERN_C_BEGIN
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatLoad_MPIAIJ"
1712 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1713 {
1714   Mat          A;
1715   PetscScalar  *vals,*svals;
1716   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1717   MPI_Status   status;
1718   int          i,nz,ierr,j,rstart,rend,fd;
1719   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1720   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1721   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1722 
1723   PetscFunctionBegin;
1724   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1726   if (!rank) {
1727     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1728     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1729     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1730     if (header[3] < 0) {
1731       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1732     }
1733   }
1734 
1735   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1736   M = header[1]; N = header[2];
1737   /* determine ownership of all rows */
1738   m = M/size + ((M % size) > rank);
1739   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1740   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1741   rowners[0] = 0;
1742   for (i=2; i<=size; i++) {
1743     rowners[i] += rowners[i-1];
1744   }
1745   rstart = rowners[rank];
1746   rend   = rowners[rank+1];
1747 
1748   /* distribute row lengths to all processors */
1749   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1750   offlens = ourlens + (rend-rstart);
1751   if (!rank) {
1752     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1753     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1754     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1755     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1756     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1757     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1758   } else {
1759     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1760   }
1761 
1762   if (!rank) {
1763     /* calculate the number of nonzeros on each processor */
1764     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1765     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1766     for (i=0; i<size; i++) {
1767       for (j=rowners[i]; j< rowners[i+1]; j++) {
1768         procsnz[i] += rowlengths[j];
1769       }
1770     }
1771     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1772 
1773     /* determine max buffer needed and allocate it */
1774     maxnz = 0;
1775     for (i=0; i<size; i++) {
1776       maxnz = PetscMax(maxnz,procsnz[i]);
1777     }
1778     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1779 
1780     /* read in my part of the matrix column indices  */
1781     nz   = procsnz[0];
1782     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1783     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1784 
1785     /* read in every one elses and ship off */
1786     for (i=1; i<size; i++) {
1787       nz   = procsnz[i];
1788       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1789       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1790     }
1791     ierr = PetscFree(cols);CHKERRQ(ierr);
1792   } else {
1793     /* determine buffer space needed for message */
1794     nz = 0;
1795     for (i=0; i<m; i++) {
1796       nz += ourlens[i];
1797     }
1798     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1799 
1800     /* receive message of column indices*/
1801     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1802     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1803     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1804   }
1805 
1806   /* determine column ownership if matrix is not square */
1807   if (N != M) {
1808     n      = N/size + ((N % size) > rank);
1809     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1810     cstart = cend - n;
1811   } else {
1812     cstart = rstart;
1813     cend   = rend;
1814     n      = cend - cstart;
1815   }
1816 
1817   /* loop over local rows, determining number of off diagonal entries */
1818   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1819   jj = 0;
1820   for (i=0; i<m; i++) {
1821     for (j=0; j<ourlens[i]; j++) {
1822       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1823       jj++;
1824     }
1825   }
1826 
1827   /* create our matrix */
1828   for (i=0; i<m; i++) {
1829     ourlens[i] -= offlens[i];
1830   }
1831   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1832   A = *newmat;
1833   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1834   for (i=0; i<m; i++) {
1835     ourlens[i] += offlens[i];
1836   }
1837 
1838   if (!rank) {
1839     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1840 
1841     /* read in my part of the matrix numerical values  */
1842     nz   = procsnz[0];
1843     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1844 
1845     /* insert into matrix */
1846     jj      = rstart;
1847     smycols = mycols;
1848     svals   = vals;
1849     for (i=0; i<m; i++) {
1850       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1851       smycols += ourlens[i];
1852       svals   += ourlens[i];
1853       jj++;
1854     }
1855 
1856     /* read in other processors and ship out */
1857     for (i=1; i<size; i++) {
1858       nz   = procsnz[i];
1859       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1860       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1861     }
1862     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1863   } else {
1864     /* receive numeric values */
1865     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1866 
1867     /* receive message of values*/
1868     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1869     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1870     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1871 
1872     /* insert into matrix */
1873     jj      = rstart;
1874     smycols = mycols;
1875     svals   = vals;
1876     for (i=0; i<m; i++) {
1877       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1878       smycols += ourlens[i];
1879       svals   += ourlens[i];
1880       jj++;
1881     }
1882   }
1883   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1884   ierr = PetscFree(vals);CHKERRQ(ierr);
1885   ierr = PetscFree(mycols);CHKERRQ(ierr);
1886   ierr = PetscFree(rowners);CHKERRQ(ierr);
1887 
1888   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1889   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 EXTERN_C_END
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1896 /*
1897     Not great since it makes two copies of the submatrix, first an SeqAIJ
1898   in local and then by concatenating the local matrices the end result.
1899   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1900 */
1901 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1902 {
1903   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1904   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1905   Mat          *local,M,Mreuse;
1906   PetscScalar  *vwork,*aa;
1907   MPI_Comm     comm = mat->comm;
1908   Mat_SeqAIJ   *aij;
1909 
1910 
1911   PetscFunctionBegin;
1912   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1914 
1915   if (call ==  MAT_REUSE_MATRIX) {
1916     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1917     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1918     local = &Mreuse;
1919     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1920   } else {
1921     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1922     Mreuse = *local;
1923     ierr   = PetscFree(local);CHKERRQ(ierr);
1924   }
1925 
1926   /*
1927       m - number of local rows
1928       n - number of columns (same on all processors)
1929       rstart - first row in new global matrix generated
1930   */
1931   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1932   if (call == MAT_INITIAL_MATRIX) {
1933     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1934     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1935     ii  = aij->i;
1936     jj  = aij->j;
1937 
1938     /*
1939         Determine the number of non-zeros in the diagonal and off-diagonal
1940         portions of the matrix in order to do correct preallocation
1941     */
1942 
1943     /* first get start and end of "diagonal" columns */
1944     if (csize == PETSC_DECIDE) {
1945       nlocal = n/size + ((n % size) > rank);
1946     } else {
1947       nlocal = csize;
1948     }
1949     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1950     rstart = rend - nlocal;
1951     if (rank == size - 1 && rend != n) {
1952       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1953     }
1954 
1955     /* next, compute all the lengths */
1956     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1957     olens = dlens + m;
1958     for (i=0; i<m; i++) {
1959       jend = ii[i+1] - ii[i];
1960       olen = 0;
1961       dlen = 0;
1962       for (j=0; j<jend; j++) {
1963         if (*jj < rstart || *jj >= rend) olen++;
1964         else dlen++;
1965         jj++;
1966       }
1967       olens[i] = olen;
1968       dlens[i] = dlen;
1969     }
1970     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1971     ierr = PetscFree(dlens);CHKERRQ(ierr);
1972   } else {
1973     int ml,nl;
1974 
1975     M = *newmat;
1976     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1977     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1978     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1979     /*
1980          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1981        rather than the slower MatSetValues().
1982     */
1983     M->was_assembled = PETSC_TRUE;
1984     M->assembled     = PETSC_FALSE;
1985   }
1986   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1987   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1988   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1989   ii  = aij->i;
1990   jj  = aij->j;
1991   aa  = aij->a;
1992   for (i=0; i<m; i++) {
1993     row   = rstart + i;
1994     nz    = ii[i+1] - ii[i];
1995     cwork = jj;     jj += nz;
1996     vwork = aa;     aa += nz;
1997     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1998   }
1999 
2000   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2001   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2002   *newmat = M;
2003 
2004   /* save submatrix used in processor for next request */
2005   if (call ==  MAT_INITIAL_MATRIX) {
2006     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2007     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2008   }
2009 
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2015 /*@C
2016    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2017    (the default parallel PETSc format).  For good matrix assembly performance
2018    the user should preallocate the matrix storage by setting the parameters
2019    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2020    performance can be increased by more than a factor of 50.
2021 
2022    Collective on MPI_Comm
2023 
2024    Input Parameters:
2025 +  A - the matrix
2026 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2027            (same value is used for all local rows)
2028 .  d_nnz - array containing the number of nonzeros in the various rows of the
2029            DIAGONAL portion of the local submatrix (possibly different for each row)
2030            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2031            The size of this array is equal to the number of local rows, i.e 'm'.
2032            You must leave room for the diagonal entry even if it is zero.
2033 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2034            submatrix (same value is used for all local rows).
2035 -  o_nnz - array containing the number of nonzeros in the various rows of the
2036            OFF-DIAGONAL portion of the local submatrix (possibly different for
2037            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2038            structure. The size of this array is equal to the number
2039            of local rows, i.e 'm'.
2040 
2041    The AIJ format (also called the Yale sparse matrix format or
2042    compressed row storage), is fully compatible with standard Fortran 77
2043    storage.  That is, the stored row and column indices can begin at
2044    either one (as in Fortran) or zero.  See the users manual for details.
2045 
2046    The user MUST specify either the local or global matrix dimensions
2047    (possibly both).
2048 
2049    The parallel matrix is partitioned such that the first m0 rows belong to
2050    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2051    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2052 
2053    The DIAGONAL portion of the local submatrix of a processor can be defined
2054    as the submatrix which is obtained by extraction the part corresponding
2055    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2056    first row that belongs to the processor, and r2 is the last row belonging
2057    to the this processor. This is a square mxm matrix. The remaining portion
2058    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2059 
2060    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2061 
2062    By default, this format uses inodes (identical nodes) when possible.
2063    We search for consecutive rows with the same nonzero structure, thereby
2064    reusing matrix information to achieve increased efficiency.
2065 
2066    Options Database Keys:
2067 +  -mat_aij_no_inode  - Do not use inodes
2068 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2069 -  -mat_aij_oneindex - Internally use indexing starting at 1
2070         rather than 0.  Note that when calling MatSetValues(),
2071         the user still MUST index entries starting at 0!
2072 
2073    Example usage:
2074 
2075    Consider the following 8x8 matrix with 34 non-zero values, that is
2076    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2077    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2078    as follows:
2079 
2080 .vb
2081             1  2  0  |  0  3  0  |  0  4
2082     Proc0   0  5  6  |  7  0  0  |  8  0
2083             9  0 10  | 11  0  0  | 12  0
2084     -------------------------------------
2085            13  0 14  | 15 16 17  |  0  0
2086     Proc1   0 18  0  | 19 20 21  |  0  0
2087             0  0  0  | 22 23  0  | 24  0
2088     -------------------------------------
2089     Proc2  25 26 27  |  0  0 28  | 29  0
2090            30  0  0  | 31 32 33  |  0 34
2091 .ve
2092 
2093    This can be represented as a collection of submatrices as:
2094 
2095 .vb
2096       A B C
2097       D E F
2098       G H I
2099 .ve
2100 
2101    Where the submatrices A,B,C are owned by proc0, D,E,F are
2102    owned by proc1, G,H,I are owned by proc2.
2103 
2104    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2105    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2106    The 'M','N' parameters are 8,8, and have the same values on all procs.
2107 
2108    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2109    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2110    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2111    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2112    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2113    matrix, ans [DF] as another SeqAIJ matrix.
2114 
2115    When d_nz, o_nz parameters are specified, d_nz storage elements are
2116    allocated for every row of the local diagonal submatrix, and o_nz
2117    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2118    One way to choose d_nz and o_nz is to use the max nonzerors per local
2119    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2120    In this case, the values of d_nz,o_nz are:
2121 .vb
2122      proc0 : dnz = 2, o_nz = 2
2123      proc1 : dnz = 3, o_nz = 2
2124      proc2 : dnz = 1, o_nz = 4
2125 .ve
2126    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2127    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2128    for proc3. i.e we are using 12+15+10=37 storage locations to store
2129    34 values.
2130 
2131    When d_nnz, o_nnz parameters are specified, the storage is specified
2132    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2133    In the above case the values for d_nnz,o_nnz are:
2134 .vb
2135      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2136      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2137      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2138 .ve
2139    Here the space allocated is sum of all the above values i.e 34, and
2140    hence pre-allocation is perfect.
2141 
2142    Level: intermediate
2143 
2144 .keywords: matrix, aij, compressed row, sparse, parallel
2145 
2146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2147 @*/
2148 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2149 {
2150   Mat_MPIAIJ   *b;
2151   int          ierr,i;
2152   PetscTruth   flg2;
2153 
2154   PetscFunctionBegin;
2155   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2156   if (!flg2) PetscFunctionReturn(0);
2157   B->preallocated = PETSC_TRUE;
2158   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2159   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2160   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2161   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2162   if (d_nnz) {
2163     for (i=0; i<B->m; i++) {
2164       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2165     }
2166   }
2167   if (o_nnz) {
2168     for (i=0; i<B->m; i++) {
2169       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2170     }
2171   }
2172   b = (Mat_MPIAIJ*)B->data;
2173 
2174   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2175   PetscLogObjectParent(B,b->A);
2176   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2177   PetscLogObjectParent(B,b->B);
2178 
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatCreateMPIAIJ"
2184 /*@C
2185    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2186    (the default parallel PETSc format).  For good matrix assembly performance
2187    the user should preallocate the matrix storage by setting the parameters
2188    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2189    performance can be increased by more than a factor of 50.
2190 
2191    Collective on MPI_Comm
2192 
2193    Input Parameters:
2194 +  comm - MPI communicator
2195 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2196            This value should be the same as the local size used in creating the
2197            y vector for the matrix-vector product y = Ax.
2198 .  n - This value should be the same as the local size used in creating the
2199        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2200        calculated if N is given) For square matrices n is almost always m.
2201 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2202 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2203 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2204            (same value is used for all local rows)
2205 .  d_nnz - array containing the number of nonzeros in the various rows of the
2206            DIAGONAL portion of the local submatrix (possibly different for each row)
2207            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2208            The size of this array is equal to the number of local rows, i.e 'm'.
2209            You must leave room for the diagonal entry even if it is zero.
2210 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2211            submatrix (same value is used for all local rows).
2212 -  o_nnz - array containing the number of nonzeros in the various rows of the
2213            OFF-DIAGONAL portion of the local submatrix (possibly different for
2214            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2215            structure. The size of this array is equal to the number
2216            of local rows, i.e 'm'.
2217 
2218    Output Parameter:
2219 .  A - the matrix
2220 
2221    Notes:
2222    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2223    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2224    storage requirements for this matrix.
2225 
2226    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2227    processor than it must be used on all processors that share the object for
2228    that argument.
2229 
2230    The AIJ format (also called the Yale sparse matrix format or
2231    compressed row storage), is fully compatible with standard Fortran 77
2232    storage.  That is, the stored row and column indices can begin at
2233    either one (as in Fortran) or zero.  See the users manual for details.
2234 
2235    The user MUST specify either the local or global matrix dimensions
2236    (possibly both).
2237 
2238    The parallel matrix is partitioned such that the first m0 rows belong to
2239    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2240    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2241 
2242    The DIAGONAL portion of the local submatrix of a processor can be defined
2243    as the submatrix which is obtained by extraction the part corresponding
2244    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2245    first row that belongs to the processor, and r2 is the last row belonging
2246    to the this processor. This is a square mxm matrix. The remaining portion
2247    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2248 
2249    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2250 
2251    By default, this format uses inodes (identical nodes) when possible.
2252    We search for consecutive rows with the same nonzero structure, thereby
2253    reusing matrix information to achieve increased efficiency.
2254 
2255    Options Database Keys:
2256 +  -mat_aij_no_inode  - Do not use inodes
2257 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2258 -  -mat_aij_oneindex - Internally use indexing starting at 1
2259         rather than 0.  Note that when calling MatSetValues(),
2260         the user still MUST index entries starting at 0!
2261 
2262 
2263    Example usage:
2264 
2265    Consider the following 8x8 matrix with 34 non-zero values, that is
2266    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2267    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2268    as follows:
2269 
2270 .vb
2271             1  2  0  |  0  3  0  |  0  4
2272     Proc0   0  5  6  |  7  0  0  |  8  0
2273             9  0 10  | 11  0  0  | 12  0
2274     -------------------------------------
2275            13  0 14  | 15 16 17  |  0  0
2276     Proc1   0 18  0  | 19 20 21  |  0  0
2277             0  0  0  | 22 23  0  | 24  0
2278     -------------------------------------
2279     Proc2  25 26 27  |  0  0 28  | 29  0
2280            30  0  0  | 31 32 33  |  0 34
2281 .ve
2282 
2283    This can be represented as a collection of submatrices as:
2284 
2285 .vb
2286       A B C
2287       D E F
2288       G H I
2289 .ve
2290 
2291    Where the submatrices A,B,C are owned by proc0, D,E,F are
2292    owned by proc1, G,H,I are owned by proc2.
2293 
2294    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2295    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2296    The 'M','N' parameters are 8,8, and have the same values on all procs.
2297 
2298    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2299    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2300    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2301    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2302    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2303    matrix, ans [DF] as another SeqAIJ matrix.
2304 
2305    When d_nz, o_nz parameters are specified, d_nz storage elements are
2306    allocated for every row of the local diagonal submatrix, and o_nz
2307    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2308    One way to choose d_nz and o_nz is to use the max nonzerors per local
2309    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2310    In this case, the values of d_nz,o_nz are:
2311 .vb
2312      proc0 : dnz = 2, o_nz = 2
2313      proc1 : dnz = 3, o_nz = 2
2314      proc2 : dnz = 1, o_nz = 4
2315 .ve
2316    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2317    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2318    for proc3. i.e we are using 12+15+10=37 storage locations to store
2319    34 values.
2320 
2321    When d_nnz, o_nnz parameters are specified, the storage is specified
2322    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2323    In the above case the values for d_nnz,o_nnz are:
2324 .vb
2325      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2326      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2327      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2328 .ve
2329    Here the space allocated is sum of all the above values i.e 34, and
2330    hence pre-allocation is perfect.
2331 
2332    Level: intermediate
2333 
2334 .keywords: matrix, aij, compressed row, sparse, parallel
2335 
2336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2337 @*/
2338 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2339 {
2340   int ierr,size;
2341 
2342   PetscFunctionBegin;
2343   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2344   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2345   if (size > 1) {
2346     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2347     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2348   } else {
2349     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2350     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2351   }
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2357 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2358 {
2359   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2360   PetscFunctionBegin;
2361   *Ad     = a->A;
2362   *Ao     = a->B;
2363   *colmap = a->garray;
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2369 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2370 {
2371   int        ierr;
2372   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2373 
2374   PetscFunctionBegin;
2375   if (coloring->ctype == IS_COLORING_LOCAL) {
2376     int        *allcolors,*colors,i;
2377     ISColoring ocoloring;
2378 
2379     /* set coloring for diagonal portion */
2380     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2381 
2382     /* set coloring for off-diagonal portion */
2383     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2384     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2385     for (i=0; i<a->B->n; i++) {
2386       colors[i] = allcolors[a->garray[i]];
2387     }
2388     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2389     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2390     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2391     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2392   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2393     int        *colors,i,*larray;
2394     ISColoring ocoloring;
2395 
2396     /* set coloring for diagonal portion */
2397     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2398     for (i=0; i<a->A->n; i++) {
2399       larray[i] = i + a->cstart;
2400     }
2401     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2402     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2403     for (i=0; i<a->A->n; i++) {
2404       colors[i] = coloring->colors[larray[i]];
2405     }
2406     ierr = PetscFree(larray);CHKERRQ(ierr);
2407     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2408     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2409     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2410 
2411     /* set coloring for off-diagonal portion */
2412     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2413     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2414     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2415     for (i=0; i<a->B->n; i++) {
2416       colors[i] = coloring->colors[larray[i]];
2417     }
2418     ierr = PetscFree(larray);CHKERRQ(ierr);
2419     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2420     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2421     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2422   } else {
2423     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2424   }
2425 
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2431 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2432 {
2433   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2434   int        ierr;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2438   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2444 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2445 {
2446   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2447   int        ierr;
2448 
2449   PetscFunctionBegin;
2450   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2451   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454