xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a91f3ead952e4901dc6bb3f6125eb3ed6fbc984d)
1 /*$Id: mpiaij.c,v 1.328 2001/03/09 20:10:12 bsmith Exp balay $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         Scalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNC__
193 #define __FUNC__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
197   Scalar     value;
198   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int        cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat        A = aij->A;
204   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
205   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   Scalar     *aa = a->a;
207   PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat        B = aij->B;
209   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
210   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   Scalar     *ba = b->a;
212 
213   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int        nonew = a->nonew,shift = a->indexshift;
215   Scalar     *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNC__
279 #define __FUNC__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNC__
322 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNC__
349 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNC__
407 #define __FUNC__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNC__
572 #define __FUNC__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNC__
606 #define __FUNC__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNC__
627 #define __FUNC__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNC__
652 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNC__
668 #define __FUNC__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(Scalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNC__
681 #define __FUNC__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNC__
709 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
755   } else {
756     /* assemble the entire matrix onto first processor. */
757     Mat         A;
758     Mat_SeqAIJ *Aloc;
759     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
760     Scalar      *a;
761 
762     if (!rank) {
763       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
764     } else {
765       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
766     }
767     PetscLogObjectParent(mat,A);
768 
769     /* copy over the A part */
770     Aloc = (Mat_SeqAIJ*)aij->A->data;
771     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
772     row = aij->rstart;
773     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
774     for (i=0; i<m; i++) {
775       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
776       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
777     }
778     aj = Aloc->j;
779     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
780 
781     /* copy over the B part */
782     Aloc = (Mat_SeqAIJ*)aij->B->data;
783     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
784     row  = aij->rstart;
785     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
786     ct   = cols;
787     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
788     for (i=0; i<m; i++) {
789       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
790       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
791     }
792     ierr = PetscFree(ct);CHKERRQ(ierr);
793     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
794     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     /*
796        Everyone has to call to draw the matrix since the graphics waits are
797        synchronized across all processors that share the PetscDraw object
798     */
799     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
800     if (!rank) {
801       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
802     }
803     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
804     ierr = MatDestroy(A);CHKERRQ(ierr);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNC__
810 #define __FUNC__ "MatView_MPIAIJ"
811 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
812 {
813   int        ierr;
814   PetscTruth isascii,isdraw,issocket,isbinary;
815 
816   PetscFunctionBegin;
817   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
818   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
819   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
820   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
821   if (isascii || isdraw || isbinary || issocket) {
822     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
823   } else {
824     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
825   }
826   PetscFunctionReturn(0);
827 }
828 
829 /*
830     This has to provide several versions.
831 
832      2) a) use only local smoothing updating outer values only once.
833         b) local smoothing updating outer values each inner iteration
834      3) color updating out values betwen colors.
835 */
836 #undef __FUNC__
837 #define __FUNC__ "MatRelax_MPIAIJ"
838 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
839 {
840   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
841   Mat        AA = mat->A,BB = mat->B;
842   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
843   Scalar     *b,*x,*xs,*ls,d,*v,sum;
844   int        ierr,*idx,*diag;
845   int        n = matin->n,m = matin->m,i,shift = A->indexshift;
846 
847   PetscFunctionBegin;
848   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
849   diag = A->diag;
850   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
851     if (flag & SOR_ZERO_INITIAL_GUESS) {
852       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
853       PetscFunctionReturn(0);
854     }
855     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
856     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
857     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
858     if (xx != bb) {
859       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
860     } else {
861       b = x;
862     }
863     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
864     xs = x + shift; /* shift by one for index start of 1 */
865     ls = ls + shift;
866     while (its--) {
867       /* go down through the rows */
868       for (i=0; i<m; i++) {
869         n    = A->i[i+1] - A->i[i];
870 	PetscLogFlops(4*n+3);
871         idx  = A->j + A->i[i] + shift;
872         v    = A->a + A->i[i] + shift;
873         sum  = b[i];
874         SPARSEDENSEMDOT(sum,xs,v,idx,n);
875         d    = fshift + A->a[diag[i]+shift];
876         n    = B->i[i+1] - B->i[i];
877         idx  = B->j + B->i[i] + shift;
878         v    = B->a + B->i[i] + shift;
879         SPARSEDENSEMDOT(sum,ls,v,idx,n);
880         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
881       }
882       /* come up through the rows */
883       for (i=m-1; i>-1; i--) {
884         n    = A->i[i+1] - A->i[i];
885 	PetscLogFlops(4*n+3);
886         idx  = A->j + A->i[i] + shift;
887         v    = A->a + A->i[i] + shift;
888         sum  = b[i];
889         SPARSEDENSEMDOT(sum,xs,v,idx,n);
890         d    = fshift + A->a[diag[i]+shift];
891         n    = B->i[i+1] - B->i[i];
892         idx  = B->j + B->i[i] + shift;
893         v    = B->a + B->i[i] + shift;
894         SPARSEDENSEMDOT(sum,ls,v,idx,n);
895         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
896       }
897     }
898     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
899     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
900     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
901   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
902     if (flag & SOR_ZERO_INITIAL_GUESS) {
903       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     }
906     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
907     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
908     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
909     if (xx != bb) {
910       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
911     } else {
912       b = x;
913     }
914     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
915     xs = x + shift; /* shift by one for index start of 1 */
916     ls = ls + shift;
917     while (its--) {
918       for (i=0; i<m; i++) {
919         n    = A->i[i+1] - A->i[i];
920 	PetscLogFlops(4*n+3);
921         idx  = A->j + A->i[i] + shift;
922         v    = A->a + A->i[i] + shift;
923         sum  = b[i];
924         SPARSEDENSEMDOT(sum,xs,v,idx,n);
925         d    = fshift + A->a[diag[i]+shift];
926         n    = B->i[i+1] - B->i[i];
927         idx  = B->j + B->i[i] + shift;
928         v    = B->a + B->i[i] + shift;
929         SPARSEDENSEMDOT(sum,ls,v,idx,n);
930         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
931       }
932     }
933     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
934     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
935     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
936   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
937     if (flag & SOR_ZERO_INITIAL_GUESS) {
938       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
939       PetscFunctionReturn(0);
940     }
941     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
942     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
943     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
944     if (xx != bb) {
945       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
946     } else {
947       b = x;
948     }
949     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
950     xs = x + shift; /* shift by one for index start of 1 */
951     ls = ls + shift;
952     while (its--) {
953       for (i=m-1; i>-1; i--) {
954         n    = A->i[i+1] - A->i[i];
955 	PetscLogFlops(4*n+3);
956         idx  = A->j + A->i[i] + shift;
957         v    = A->a + A->i[i] + shift;
958         sum  = b[i];
959         SPARSEDENSEMDOT(sum,xs,v,idx,n);
960         d    = fshift + A->a[diag[i]+shift];
961         n    = B->i[i+1] - B->i[i];
962         idx  = B->j + B->i[i] + shift;
963         v    = B->a + B->i[i] + shift;
964         SPARSEDENSEMDOT(sum,ls,v,idx,n);
965         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
966       }
967     }
968     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
969     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
970     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
971   } else {
972     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNC__
978 #define __FUNC__ "MatGetInfo_MPIAIJ"
979 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
980 {
981   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
982   Mat        A = mat->A,B = mat->B;
983   int        ierr;
984   PetscReal  isend[5],irecv[5];
985 
986   PetscFunctionBegin;
987   info->block_size     = 1.0;
988   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
989   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
990   isend[3] = info->memory;  isend[4] = info->mallocs;
991   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
992   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
993   isend[3] += info->memory;  isend[4] += info->mallocs;
994   if (flag == MAT_LOCAL) {
995     info->nz_used      = isend[0];
996     info->nz_allocated = isend[1];
997     info->nz_unneeded  = isend[2];
998     info->memory       = isend[3];
999     info->mallocs      = isend[4];
1000   } else if (flag == MAT_GLOBAL_MAX) {
1001     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1002     info->nz_used      = irecv[0];
1003     info->nz_allocated = irecv[1];
1004     info->nz_unneeded  = irecv[2];
1005     info->memory       = irecv[3];
1006     info->mallocs      = irecv[4];
1007   } else if (flag == MAT_GLOBAL_SUM) {
1008     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1009     info->nz_used      = irecv[0];
1010     info->nz_allocated = irecv[1];
1011     info->nz_unneeded  = irecv[2];
1012     info->memory       = irecv[3];
1013     info->mallocs      = irecv[4];
1014   }
1015   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1016   info->fill_ratio_needed = 0;
1017   info->factor_mallocs    = 0;
1018   info->rows_global       = (double)matin->M;
1019   info->columns_global    = (double)matin->N;
1020   info->rows_local        = (double)matin->m;
1021   info->columns_local     = (double)matin->N;
1022 
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNC__
1027 #define __FUNC__ "MatSetOption_MPIAIJ"
1028 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1029 {
1030   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1031   int        ierr;
1032 
1033   PetscFunctionBegin;
1034   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1035       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1036       op == MAT_COLUMNS_UNSORTED ||
1037       op == MAT_COLUMNS_SORTED ||
1038       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1039       op == MAT_KEEP_ZEROED_ROWS ||
1040       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1041       op == MAT_USE_INODES ||
1042       op == MAT_DO_NOT_USE_INODES ||
1043       op == MAT_IGNORE_ZERO_ENTRIES) {
1044         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1045         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1046   } else if (op == MAT_ROW_ORIENTED) {
1047     a->roworiented = PETSC_TRUE;
1048     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1049     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1050   } else if (op == MAT_ROWS_SORTED ||
1051              op == MAT_ROWS_UNSORTED ||
1052              op == MAT_SYMMETRIC ||
1053              op == MAT_STRUCTURALLY_SYMMETRIC ||
1054              op == MAT_YES_NEW_DIAGONALS) {
1055     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1056   } else if (op == MAT_COLUMN_ORIENTED) {
1057     a->roworiented = PETSC_FALSE;
1058     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1059     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1060   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1061     a->donotstash = PETSC_TRUE;
1062   } else if (op == MAT_NO_NEW_DIAGONALS){
1063     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1064   } else {
1065     SETERRQ(PETSC_ERR_SUP,"unknown option");
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNC__
1071 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1072 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1073 {
1074   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1075 
1076   PetscFunctionBegin;
1077   if (m) *m = mat->rstart;
1078   if (n) *n = mat->rend;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNC__
1083 #define __FUNC__ "MatGetRow_MPIAIJ"
1084 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1085 {
1086   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1087   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1088   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1089   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1090   int        *cmap,*idx_p;
1091 
1092   PetscFunctionBegin;
1093   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1094   mat->getrowactive = PETSC_TRUE;
1095 
1096   if (!mat->rowvalues && (idx || v)) {
1097     /*
1098         allocate enough space to hold information from the longest row.
1099     */
1100     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1101     int     max = 1,tmp;
1102     for (i=0; i<matin->m; i++) {
1103       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1104       if (max < tmp) { max = tmp; }
1105     }
1106     ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr);
1107     mat->rowindices = (int*)(mat->rowvalues + max);
1108   }
1109 
1110   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1111   lrow = row - rstart;
1112 
1113   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1114   if (!v)   {pvA = 0; pvB = 0;}
1115   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1116   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1117   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1118   nztot = nzA + nzB;
1119 
1120   cmap  = mat->garray;
1121   if (v  || idx) {
1122     if (nztot) {
1123       /* Sort by increasing column numbers, assuming A and B already sorted */
1124       int imark = -1;
1125       if (v) {
1126         *v = v_p = mat->rowvalues;
1127         for (i=0; i<nzB; i++) {
1128           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1129           else break;
1130         }
1131         imark = i;
1132         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1133         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1134       }
1135       if (idx) {
1136         *idx = idx_p = mat->rowindices;
1137         if (imark > -1) {
1138           for (i=0; i<imark; i++) {
1139             idx_p[i] = cmap[cworkB[i]];
1140           }
1141         } else {
1142           for (i=0; i<nzB; i++) {
1143             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1144             else break;
1145           }
1146           imark = i;
1147         }
1148         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1149         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1150       }
1151     } else {
1152       if (idx) *idx = 0;
1153       if (v)   *v   = 0;
1154     }
1155   }
1156   *nz = nztot;
1157   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1158   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNC__
1163 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1164 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1165 {
1166   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1167 
1168   PetscFunctionBegin;
1169   if (aij->getrowactive == PETSC_FALSE) {
1170     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1171   }
1172   aij->getrowactive = PETSC_FALSE;
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNC__
1177 #define __FUNC__ "MatNorm_MPIAIJ"
1178 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1179 {
1180   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1181   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1182   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1183   PetscReal  sum = 0.0;
1184   Scalar     *v;
1185 
1186   PetscFunctionBegin;
1187   if (aij->size == 1) {
1188     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1189   } else {
1190     if (type == NORM_FROBENIUS) {
1191       v = amat->a;
1192       for (i=0; i<amat->nz; i++) {
1193 #if defined(PETSC_USE_COMPLEX)
1194         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1195 #else
1196         sum += (*v)*(*v); v++;
1197 #endif
1198       }
1199       v = bmat->a;
1200       for (i=0; i<bmat->nz; i++) {
1201 #if defined(PETSC_USE_COMPLEX)
1202         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1203 #else
1204         sum += (*v)*(*v); v++;
1205 #endif
1206       }
1207       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1208       *norm = sqrt(*norm);
1209     } else if (type == NORM_1) { /* max column norm */
1210       PetscReal *tmp,*tmp2;
1211       int    *jj,*garray = aij->garray;
1212       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1213       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1214       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1215       *norm = 0.0;
1216       v = amat->a; jj = amat->j;
1217       for (j=0; j<amat->nz; j++) {
1218         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1219       }
1220       v = bmat->a; jj = bmat->j;
1221       for (j=0; j<bmat->nz; j++) {
1222         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1223       }
1224       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1225       for (j=0; j<mat->N; j++) {
1226         if (tmp2[j] > *norm) *norm = tmp2[j];
1227       }
1228       ierr = PetscFree(tmp);CHKERRQ(ierr);
1229       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1230     } else if (type == NORM_INFINITY) { /* max row norm */
1231       PetscReal ntemp = 0.0;
1232       for (j=0; j<aij->A->m; j++) {
1233         v = amat->a + amat->i[j] + shift;
1234         sum = 0.0;
1235         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1236           sum += PetscAbsScalar(*v); v++;
1237         }
1238         v = bmat->a + bmat->i[j] + shift;
1239         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1240           sum += PetscAbsScalar(*v); v++;
1241         }
1242         if (sum > ntemp) ntemp = sum;
1243       }
1244       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1245     } else {
1246       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1247     }
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNC__
1253 #define __FUNC__ "MatTranspose_MPIAIJ"
1254 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1255 {
1256   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1257   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1258   int        ierr,shift = Aloc->indexshift;
1259   int        M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1260   Mat        B;
1261   Scalar     *array;
1262 
1263   PetscFunctionBegin;
1264   if (!matout && M != N) {
1265     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1266   }
1267 
1268   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1269 
1270   /* copy over the A part */
1271   Aloc = (Mat_SeqAIJ*)a->A->data;
1272   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1273   row = a->rstart;
1274   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1275   for (i=0; i<m; i++) {
1276     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1277     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1278   }
1279   aj = Aloc->j;
1280   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1281 
1282   /* copy over the B part */
1283   Aloc = (Mat_SeqAIJ*)a->B->data;
1284   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1285   row  = a->rstart;
1286   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1287   ct   = cols;
1288   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1289   for (i=0; i<m; i++) {
1290     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1291     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1292   }
1293   ierr = PetscFree(ct);CHKERRQ(ierr);
1294   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296   if (matout) {
1297     *matout = B;
1298   } else {
1299     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNC__
1305 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1306 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1307 {
1308   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1309   Mat        a = aij->A,b = aij->B;
1310   int        ierr,s1,s2,s3;
1311 
1312   PetscFunctionBegin;
1313   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1314   if (rr) {
1315     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1316     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1317     /* Overlap communication with computation. */
1318     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1319   }
1320   if (ll) {
1321     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1322     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1323     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1324   }
1325   /* scale  the diagonal block */
1326   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1327 
1328   if (rr) {
1329     /* Do a scatter end and then right scale the off-diagonal block */
1330     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1331     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1332   }
1333 
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 
1338 #undef __FUNC__
1339 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1340 int MatPrintHelp_MPIAIJ(Mat A)
1341 {
1342   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1343   int        ierr;
1344 
1345   PetscFunctionBegin;
1346   if (!a->rank) {
1347     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNC__
1353 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1354 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1355 {
1356   PetscFunctionBegin;
1357   *bs = 1;
1358   PetscFunctionReturn(0);
1359 }
1360 #undef __FUNC__
1361 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1362 int MatSetUnfactored_MPIAIJ(Mat A)
1363 {
1364   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1365   int        ierr;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNC__
1373 #define __FUNC__ "MatEqual_MPIAIJ"
1374 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1375 {
1376   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1377   Mat        a,b,c,d;
1378   PetscTruth flg;
1379   int        ierr;
1380 
1381   PetscFunctionBegin;
1382   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1383   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1384   a = matA->A; b = matA->B;
1385   c = matB->A; d = matB->B;
1386 
1387   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1388   if (flg == PETSC_TRUE) {
1389     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1390   }
1391   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ "MatCopy_MPIAIJ"
1397 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1398 {
1399   int        ierr;
1400   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1401   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1402   PetscTruth flg;
1403 
1404   PetscFunctionBegin;
1405   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1406   if (str != SAME_NONZERO_PATTERN || !flg) {
1407     /* because of the column compression in the off-processor part of the matrix a->B,
1408        the number of columns in a->B and b->B may be different, hence we cannot call
1409        the MatCopy() directly on the two parts. If need be, we can provide a more
1410        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1411        then copying the submatrices */
1412     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1413   } else {
1414     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1415     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNC__
1421 #define __FUNC__ "MatSetUpPreallocation_MPIAIJ"
1422 int MatSetUpPreallocation_MPIAIJ(Mat A)
1423 {
1424   int        ierr;
1425 
1426   PetscFunctionBegin;
1427   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1432 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1433 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1434 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1435 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1436 
1437 /* -------------------------------------------------------------------*/
1438 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1439        MatGetRow_MPIAIJ,
1440        MatRestoreRow_MPIAIJ,
1441        MatMult_MPIAIJ,
1442        MatMultAdd_MPIAIJ,
1443        MatMultTranspose_MPIAIJ,
1444        MatMultTransposeAdd_MPIAIJ,
1445        0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        0,
1451        MatRelax_MPIAIJ,
1452        MatTranspose_MPIAIJ,
1453        MatGetInfo_MPIAIJ,
1454        MatEqual_MPIAIJ,
1455        MatGetDiagonal_MPIAIJ,
1456        MatDiagonalScale_MPIAIJ,
1457        MatNorm_MPIAIJ,
1458        MatAssemblyBegin_MPIAIJ,
1459        MatAssemblyEnd_MPIAIJ,
1460        0,
1461        MatSetOption_MPIAIJ,
1462        MatZeroEntries_MPIAIJ,
1463        MatZeroRows_MPIAIJ,
1464        0,
1465        0,
1466        0,
1467        0,
1468        MatSetUpPreallocation_MPIAIJ,
1469        0,
1470        MatGetOwnershipRange_MPIAIJ,
1471        0,
1472        0,
1473        0,
1474        0,
1475        MatDuplicate_MPIAIJ,
1476        0,
1477        0,
1478        0,
1479        0,
1480        0,
1481        MatGetSubMatrices_MPIAIJ,
1482        MatIncreaseOverlap_MPIAIJ,
1483        MatGetValues_MPIAIJ,
1484        MatCopy_MPIAIJ,
1485        MatPrintHelp_MPIAIJ,
1486        MatScale_MPIAIJ,
1487        0,
1488        0,
1489        0,
1490        MatGetBlockSize_MPIAIJ,
1491        0,
1492        0,
1493        0,
1494        0,
1495        MatFDColoringCreate_MPIAIJ,
1496        0,
1497        MatSetUnfactored_MPIAIJ,
1498        0,
1499        0,
1500        MatGetSubMatrix_MPIAIJ,
1501        MatDestroy_MPIAIJ,
1502        MatView_MPIAIJ,
1503        MatGetMaps_Petsc};
1504 
1505 /* ----------------------------------------------------------------------------------------*/
1506 
1507 EXTERN_C_BEGIN
1508 #undef __FUNC__
1509 #define __FUNC__ "MatStoreValues_MPIAIJ"
1510 int MatStoreValues_MPIAIJ(Mat mat)
1511 {
1512   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1513   int        ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1517   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 EXTERN_C_END
1521 
1522 EXTERN_C_BEGIN
1523 #undef __FUNC__
1524 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1525 int MatRetrieveValues_MPIAIJ(Mat mat)
1526 {
1527   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1528   int        ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1532   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1533   PetscFunctionReturn(0);
1534 }
1535 EXTERN_C_END
1536 
1537 #include "petscpc.h"
1538 EXTERN_C_BEGIN
1539 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1540 EXTERN_C_END
1541 
1542 EXTERN int MatUseXXT_MPIAIJ(Mat);
1543 EXTERN int MatUseXYT_MPIAIJ(Mat);
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNC__
1547 #define __FUNC__ "MatCreate_MPIAIJ"
1548 int MatCreate_MPIAIJ(Mat B)
1549 {
1550   Mat_MPIAIJ   *b;
1551   int          ierr,i,size;
1552   PetscTruth   flg;
1553 
1554   PetscFunctionBegin;
1555   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1556 
1557   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1558   B->data         = (void*)b;
1559   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1560   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1561   B->factor       = 0;
1562   B->assembled    = PETSC_FALSE;
1563   B->mapping      = 0;
1564 
1565   B->insertmode      = NOT_SET_VALUES;
1566   b->size            = size;
1567   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1568 
1569   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1570   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1571 
1572   /* the information in the maps duplicates the information computed below, eventually
1573      we should remove the duplicate information that is not contained in the maps */
1574   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1575   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1576 
1577   /* build local table of row and column ownerships */
1578   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1579   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1580   b->cowners = b->rowners + b->size + 2;
1581   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1582   b->rowners[0] = 0;
1583   for (i=2; i<=b->size; i++) {
1584     b->rowners[i] += b->rowners[i-1];
1585   }
1586   b->rstart = b->rowners[b->rank];
1587   b->rend   = b->rowners[b->rank+1];
1588   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1589   b->cowners[0] = 0;
1590   for (i=2; i<=b->size; i++) {
1591     b->cowners[i] += b->cowners[i-1];
1592   }
1593   b->cstart = b->cowners[b->rank];
1594   b->cend   = b->cowners[b->rank+1];
1595 
1596   /* build cache for off array entries formed */
1597   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1598   b->donotstash  = PETSC_FALSE;
1599   b->colmap      = 0;
1600   b->garray      = 0;
1601   b->roworiented = PETSC_TRUE;
1602 
1603   /* stuff used for matrix vector multiply */
1604   b->lvec      = PETSC_NULL;
1605   b->Mvctx     = PETSC_NULL;
1606 
1607   /* stuff for MatGetRow() */
1608   b->rowindices   = 0;
1609   b->rowvalues    = 0;
1610   b->getrowactive = PETSC_FALSE;
1611 
1612   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_mpiaij_xxt",&flg);CHKERRQ(ierr);
1613   if (flg) { ierr = MatUseXXT_MPIAIJ(B);CHKERRQ(ierr); }
1614   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_mpiaij_xyt",&flg);CHKERRQ(ierr);
1615   if (flg) { ierr = MatUseXYT_MPIAIJ(B);CHKERRQ(ierr); }
1616 
1617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1618                                      "MatStoreValues_MPIAIJ",
1619                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1621                                      "MatRetrieveValues_MPIAIJ",
1622                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1624                                      "MatGetDiagonalBlock_MPIAIJ",
1625                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNC__
1631 #define __FUNC__ "MatDuplicate_MPIAIJ"
1632 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1633 {
1634   Mat        mat;
1635   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1636   int        ierr;
1637 
1638   PetscFunctionBegin;
1639   *newmat       = 0;
1640   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1641   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1642   a    = (Mat_MPIAIJ*)mat->data;
1643   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1644   mat->factor       = matin->factor;
1645   mat->assembled    = PETSC_TRUE;
1646   mat->insertmode   = NOT_SET_VALUES;
1647   mat->preallocated = PETSC_TRUE;
1648 
1649   a->rstart       = oldmat->rstart;
1650   a->rend         = oldmat->rend;
1651   a->cstart       = oldmat->cstart;
1652   a->cend         = oldmat->cend;
1653   a->size         = oldmat->size;
1654   a->rank         = oldmat->rank;
1655   a->donotstash   = oldmat->donotstash;
1656   a->roworiented  = oldmat->roworiented;
1657   a->rowindices   = 0;
1658   a->rowvalues    = 0;
1659   a->getrowactive = PETSC_FALSE;
1660 
1661   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1662   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1663   if (oldmat->colmap) {
1664 #if defined (PETSC_USE_CTABLE)
1665     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1666 #else
1667     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1668     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1669     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1670 #endif
1671   } else a->colmap = 0;
1672   if (oldmat->garray) {
1673     int len;
1674     len  = oldmat->B->n;
1675     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1676     PetscLogObjectMemory(mat,len*sizeof(int));
1677     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1678   } else a->garray = 0;
1679 
1680   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1681   PetscLogObjectParent(mat,a->lvec);
1682   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1683   PetscLogObjectParent(mat,a->Mvctx);
1684   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1685   PetscLogObjectParent(mat,a->A);
1686   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1687   PetscLogObjectParent(mat,a->B);
1688   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1689   *newmat = mat;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #include "petscsys.h"
1694 
1695 EXTERN_C_BEGIN
1696 #undef __FUNC__
1697 #define __FUNC__ "MatLoad_MPIAIJ"
1698 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1699 {
1700   Mat          A;
1701   Scalar       *vals,*svals;
1702   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1703   MPI_Status   status;
1704   int          i,nz,ierr,j,rstart,rend,fd;
1705   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1706   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1707   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1708 
1709   PetscFunctionBegin;
1710   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1711   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1712   if (!rank) {
1713     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1714     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1715     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1716     if (header[3] < 0) {
1717       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1718     }
1719   }
1720 
1721   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1722   M = header[1]; N = header[2];
1723   /* determine ownership of all rows */
1724   m = M/size + ((M % size) > rank);
1725   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1726   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1727   rowners[0] = 0;
1728   for (i=2; i<=size; i++) {
1729     rowners[i] += rowners[i-1];
1730   }
1731   rstart = rowners[rank];
1732   rend   = rowners[rank+1];
1733 
1734   /* distribute row lengths to all processors */
1735   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1736   offlens = ourlens + (rend-rstart);
1737   if (!rank) {
1738     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1739     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1740     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1741     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1742     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1743     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1744   } else {
1745     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1746   }
1747 
1748   if (!rank) {
1749     /* calculate the number of nonzeros on each processor */
1750     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1751     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1752     for (i=0; i<size; i++) {
1753       for (j=rowners[i]; j< rowners[i+1]; j++) {
1754         procsnz[i] += rowlengths[j];
1755       }
1756     }
1757     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1758 
1759     /* determine max buffer needed and allocate it */
1760     maxnz = 0;
1761     for (i=0; i<size; i++) {
1762       maxnz = PetscMax(maxnz,procsnz[i]);
1763     }
1764     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1765 
1766     /* read in my part of the matrix column indices  */
1767     nz   = procsnz[0];
1768     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1769     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1770 
1771     /* read in every one elses and ship off */
1772     for (i=1; i<size; i++) {
1773       nz   = procsnz[i];
1774       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1775       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1776     }
1777     ierr = PetscFree(cols);CHKERRQ(ierr);
1778   } else {
1779     /* determine buffer space needed for message */
1780     nz = 0;
1781     for (i=0; i<m; i++) {
1782       nz += ourlens[i];
1783     }
1784     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1785 
1786     /* receive message of column indices*/
1787     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1788     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1789     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1790   }
1791 
1792   /* determine column ownership if matrix is not square */
1793   if (N != M) {
1794     n      = N/size + ((N % size) > rank);
1795     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1796     cstart = cend - n;
1797   } else {
1798     cstart = rstart;
1799     cend   = rend;
1800     n      = cend - cstart;
1801   }
1802 
1803   /* loop over local rows, determining number of off diagonal entries */
1804   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1805   jj = 0;
1806   for (i=0; i<m; i++) {
1807     for (j=0; j<ourlens[i]; j++) {
1808       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1809       jj++;
1810     }
1811   }
1812 
1813   /* create our matrix */
1814   for (i=0; i<m; i++) {
1815     ourlens[i] -= offlens[i];
1816   }
1817   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1818   A = *newmat;
1819   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1820   for (i=0; i<m; i++) {
1821     ourlens[i] += offlens[i];
1822   }
1823 
1824   if (!rank) {
1825     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1826 
1827     /* read in my part of the matrix numerical values  */
1828     nz   = procsnz[0];
1829     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1830 
1831     /* insert into matrix */
1832     jj      = rstart;
1833     smycols = mycols;
1834     svals   = vals;
1835     for (i=0; i<m; i++) {
1836       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1837       smycols += ourlens[i];
1838       svals   += ourlens[i];
1839       jj++;
1840     }
1841 
1842     /* read in other processors and ship out */
1843     for (i=1; i<size; i++) {
1844       nz   = procsnz[i];
1845       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1846       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1847     }
1848     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1849   } else {
1850     /* receive numeric values */
1851     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr);
1852 
1853     /* receive message of values*/
1854     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1855     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1856     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1857 
1858     /* insert into matrix */
1859     jj      = rstart;
1860     smycols = mycols;
1861     svals   = vals;
1862     for (i=0; i<m; i++) {
1863       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1864       smycols += ourlens[i];
1865       svals   += ourlens[i];
1866       jj++;
1867     }
1868   }
1869   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1870   ierr = PetscFree(vals);CHKERRQ(ierr);
1871   ierr = PetscFree(mycols);CHKERRQ(ierr);
1872   ierr = PetscFree(rowners);CHKERRQ(ierr);
1873 
1874   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1875   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 EXTERN_C_END
1879 
1880 #undef __FUNC__
1881 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1882 /*
1883     Not great since it makes two copies of the submatrix, first an SeqAIJ
1884   in local and then by concatenating the local matrices the end result.
1885   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1886 */
1887 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1888 {
1889   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1890   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1891   Mat        *local,M,Mreuse;
1892   Scalar     *vwork,*aa;
1893   MPI_Comm   comm = mat->comm;
1894   Mat_SeqAIJ *aij;
1895 
1896 
1897   PetscFunctionBegin;
1898   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1900 
1901   if (call ==  MAT_REUSE_MATRIX) {
1902     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1903     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1904     local = &Mreuse;
1905     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1906   } else {
1907     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1908     Mreuse = *local;
1909     ierr = PetscFree(local);CHKERRQ(ierr);
1910   }
1911 
1912   /*
1913       m - number of local rows
1914       n - number of columns (same on all processors)
1915       rstart - first row in new global matrix generated
1916   */
1917   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1918   if (call == MAT_INITIAL_MATRIX) {
1919     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1920     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1921     ii  = aij->i;
1922     jj  = aij->j;
1923 
1924     /*
1925         Determine the number of non-zeros in the diagonal and off-diagonal
1926         portions of the matrix in order to do correct preallocation
1927     */
1928 
1929     /* first get start and end of "diagonal" columns */
1930     if (csize == PETSC_DECIDE) {
1931       nlocal = n/size + ((n % size) > rank);
1932     } else {
1933       nlocal = csize;
1934     }
1935     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1936     rstart = rend - nlocal;
1937     if (rank == size - 1 && rend != n) {
1938       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1939     }
1940 
1941     /* next, compute all the lengths */
1942     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1943     olens = dlens + m;
1944     for (i=0; i<m; i++) {
1945       jend = ii[i+1] - ii[i];
1946       olen = 0;
1947       dlen = 0;
1948       for (j=0; j<jend; j++) {
1949         if (*jj < rstart || *jj >= rend) olen++;
1950         else dlen++;
1951         jj++;
1952       }
1953       olens[i] = olen;
1954       dlens[i] = dlen;
1955     }
1956     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1957     ierr = PetscFree(dlens);CHKERRQ(ierr);
1958   } else {
1959     int ml,nl;
1960 
1961     M = *newmat;
1962     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1963     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1964     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1965     /*
1966          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1967        rather than the slower MatSetValues().
1968     */
1969     M->was_assembled = PETSC_TRUE;
1970     M->assembled     = PETSC_FALSE;
1971   }
1972   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1973   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1974   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1975   ii  = aij->i;
1976   jj  = aij->j;
1977   aa  = aij->a;
1978   for (i=0; i<m; i++) {
1979     row   = rstart + i;
1980     nz    = ii[i+1] - ii[i];
1981     cwork = jj;     jj += nz;
1982     vwork = aa;     aa += nz;
1983     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1984   }
1985 
1986   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   *newmat = M;
1989 
1990   /* save submatrix used in processor for next request */
1991   if (call ==  MAT_INITIAL_MATRIX) {
1992     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1993     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1994   }
1995 
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNC__
2000 #define __FUNC__ "MatMPIAIJSetPreallocation"
2001 /*@C
2002    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2003    (the default parallel PETSc format).  For good matrix assembly performance
2004    the user should preallocate the matrix storage by setting the parameters
2005    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2006    performance can be increased by more than a factor of 50.
2007 
2008    Collective on MPI_Comm
2009 
2010    Input Parameters:
2011 +  A - the matrix
2012 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2013            (same value is used for all local rows)
2014 .  d_nnz - array containing the number of nonzeros in the various rows of the
2015            DIAGONAL portion of the local submatrix (possibly different for each row)
2016            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2017            The size of this array is equal to the number of local rows, i.e 'm'.
2018            You must leave room for the diagonal entry even if it is zero.
2019 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2020            submatrix (same value is used for all local rows).
2021 -  o_nnz - array containing the number of nonzeros in the various rows of the
2022            OFF-DIAGONAL portion of the local submatrix (possibly different for
2023            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2024            structure. The size of this array is equal to the number
2025            of local rows, i.e 'm'.
2026 
2027    The AIJ format (also called the Yale sparse matrix format or
2028    compressed row storage), is fully compatible with standard Fortran 77
2029    storage.  That is, the stored row and column indices can begin at
2030    either one (as in Fortran) or zero.  See the users manual for details.
2031 
2032    The user MUST specify either the local or global matrix dimensions
2033    (possibly both).
2034 
2035    The parallel matrix is partitioned such that the first m0 rows belong to
2036    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2037    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2038 
2039    The DIAGONAL portion of the local submatrix of a processor can be defined
2040    as the submatrix which is obtained by extraction the part corresponding
2041    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2042    first row that belongs to the processor, and r2 is the last row belonging
2043    to the this processor. This is a square mxm matrix. The remaining portion
2044    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2045 
2046    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2047 
2048    By default, this format uses inodes (identical nodes) when possible.
2049    We search for consecutive rows with the same nonzero structure, thereby
2050    reusing matrix information to achieve increased efficiency.
2051 
2052    Options Database Keys:
2053 +  -mat_aij_no_inode  - Do not use inodes
2054 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2055 -  -mat_aij_oneindex - Internally use indexing starting at 1
2056         rather than 0.  Note that when calling MatSetValues(),
2057         the user still MUST index entries starting at 0!
2058 
2059    Example usage:
2060 
2061    Consider the following 8x8 matrix with 34 non-zero values, that is
2062    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2063    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2064    as follows:
2065 
2066 .vb
2067             1  2  0  |  0  3  0  |  0  4
2068     Proc0   0  5  6  |  7  0  0  |  8  0
2069             9  0 10  | 11  0  0  | 12  0
2070     -------------------------------------
2071            13  0 14  | 15 16 17  |  0  0
2072     Proc1   0 18  0  | 19 20 21  |  0  0
2073             0  0  0  | 22 23  0  | 24  0
2074     -------------------------------------
2075     Proc2  25 26 27  |  0  0 28  | 29  0
2076            30  0  0  | 31 32 33  |  0 34
2077 .ve
2078 
2079    This can be represented as a collection of submatrices as:
2080 
2081 .vb
2082       A B C
2083       D E F
2084       G H I
2085 .ve
2086 
2087    Where the submatrices A,B,C are owned by proc0, D,E,F are
2088    owned by proc1, G,H,I are owned by proc2.
2089 
2090    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2091    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2092    The 'M','N' parameters are 8,8, and have the same values on all procs.
2093 
2094    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2095    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2096    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2097    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2098    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2099    matrix, ans [DF] as another SeqAIJ matrix.
2100 
2101    When d_nz, o_nz parameters are specified, d_nz storage elements are
2102    allocated for every row of the local diagonal submatrix, and o_nz
2103    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2104    One way to choose d_nz and o_nz is to use the max nonzerors per local
2105    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2106    In this case, the values of d_nz,o_nz are:
2107 .vb
2108      proc0 : dnz = 2, o_nz = 2
2109      proc1 : dnz = 3, o_nz = 2
2110      proc2 : dnz = 1, o_nz = 4
2111 .ve
2112    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2113    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2114    for proc3. i.e we are using 12+15+10=37 storage locations to store
2115    34 values.
2116 
2117    When d_nnz, o_nnz parameters are specified, the storage is specified
2118    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2119    In the above case the values for d_nnz,o_nnz are:
2120 .vb
2121      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2122      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2123      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2124 .ve
2125    Here the space allocated is sum of all the above values i.e 34, and
2126    hence pre-allocation is perfect.
2127 
2128    Level: intermediate
2129 
2130 .keywords: matrix, aij, compressed row, sparse, parallel
2131 
2132 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2133 @*/
2134 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2135 {
2136   Mat_MPIAIJ   *b;
2137   int          ierr,i;
2138   PetscTruth   flg2;
2139 
2140   PetscFunctionBegin;
2141   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2142   if (!flg2) PetscFunctionReturn(0);
2143   B->preallocated = PETSC_TRUE;
2144   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz);
2145   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz);
2146   if (d_nnz) {
2147     for (i=0; i<B->m; i++) {
2148       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2149     }
2150   }
2151   if (o_nnz) {
2152     for (i=0; i<B->m; i++) {
2153       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2154     }
2155   }
2156   b = (Mat_MPIAIJ*)B->data;
2157 
2158   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2159   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2160   PetscLogObjectParent(B,b->A);
2161   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2162   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2163   PetscLogObjectParent(B,b->B);
2164 
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNC__
2169 #define __FUNC__ "MatCreateMPIAIJ"
2170 /*@C
2171    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2172    (the default parallel PETSc format).  For good matrix assembly performance
2173    the user should preallocate the matrix storage by setting the parameters
2174    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2175    performance can be increased by more than a factor of 50.
2176 
2177    Collective on MPI_Comm
2178 
2179    Input Parameters:
2180 +  comm - MPI communicator
2181 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2182            This value should be the same as the local size used in creating the
2183            y vector for the matrix-vector product y = Ax.
2184 .  n - This value should be the same as the local size used in creating the
2185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2186        calculated if N is given) For square matrices n is almost always m.
2187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2189 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2190            (same value is used for all local rows)
2191 .  d_nnz - array containing the number of nonzeros in the various rows of the
2192            DIAGONAL portion of the local submatrix (possibly different for each row)
2193            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2194            The size of this array is equal to the number of local rows, i.e 'm'.
2195            You must leave room for the diagonal entry even if it is zero.
2196 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2197            submatrix (same value is used for all local rows).
2198 -  o_nnz - array containing the number of nonzeros in the various rows of the
2199            OFF-DIAGONAL portion of the local submatrix (possibly different for
2200            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2201            structure. The size of this array is equal to the number
2202            of local rows, i.e 'm'.
2203 
2204    Output Parameter:
2205 .  A - the matrix
2206 
2207    Notes:
2208    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2209    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2210    storage requirements for this matrix.
2211 
2212    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2213    processor than it must be used on all processors that share the object for
2214    that argument.
2215 
2216    The AIJ format (also called the Yale sparse matrix format or
2217    compressed row storage), is fully compatible with standard Fortran 77
2218    storage.  That is, the stored row and column indices can begin at
2219    either one (as in Fortran) or zero.  See the users manual for details.
2220 
2221    The user MUST specify either the local or global matrix dimensions
2222    (possibly both).
2223 
2224    The parallel matrix is partitioned such that the first m0 rows belong to
2225    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2226    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2227 
2228    The DIAGONAL portion of the local submatrix of a processor can be defined
2229    as the submatrix which is obtained by extraction the part corresponding
2230    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2231    first row that belongs to the processor, and r2 is the last row belonging
2232    to the this processor. This is a square mxm matrix. The remaining portion
2233    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2234 
2235    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2236 
2237    By default, this format uses inodes (identical nodes) when possible.
2238    We search for consecutive rows with the same nonzero structure, thereby
2239    reusing matrix information to achieve increased efficiency.
2240 
2241    Options Database Keys:
2242 +  -mat_aij_no_inode  - Do not use inodes
2243 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2244 -  -mat_aij_oneindex - Internally use indexing starting at 1
2245         rather than 0.  Note that when calling MatSetValues(),
2246         the user still MUST index entries starting at 0!
2247 
2248 
2249    Example usage:
2250 
2251    Consider the following 8x8 matrix with 34 non-zero values, that is
2252    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2253    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2254    as follows:
2255 
2256 .vb
2257             1  2  0  |  0  3  0  |  0  4
2258     Proc0   0  5  6  |  7  0  0  |  8  0
2259             9  0 10  | 11  0  0  | 12  0
2260     -------------------------------------
2261            13  0 14  | 15 16 17  |  0  0
2262     Proc1   0 18  0  | 19 20 21  |  0  0
2263             0  0  0  | 22 23  0  | 24  0
2264     -------------------------------------
2265     Proc2  25 26 27  |  0  0 28  | 29  0
2266            30  0  0  | 31 32 33  |  0 34
2267 .ve
2268 
2269    This can be represented as a collection of submatrices as:
2270 
2271 .vb
2272       A B C
2273       D E F
2274       G H I
2275 .ve
2276 
2277    Where the submatrices A,B,C are owned by proc0, D,E,F are
2278    owned by proc1, G,H,I are owned by proc2.
2279 
2280    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2281    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2282    The 'M','N' parameters are 8,8, and have the same values on all procs.
2283 
2284    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2285    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2286    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2287    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2288    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2289    matrix, ans [DF] as another SeqAIJ matrix.
2290 
2291    When d_nz, o_nz parameters are specified, d_nz storage elements are
2292    allocated for every row of the local diagonal submatrix, and o_nz
2293    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2294    One way to choose d_nz and o_nz is to use the max nonzerors per local
2295    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2296    In this case, the values of d_nz,o_nz are:
2297 .vb
2298      proc0 : dnz = 2, o_nz = 2
2299      proc1 : dnz = 3, o_nz = 2
2300      proc2 : dnz = 1, o_nz = 4
2301 .ve
2302    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2303    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2304    for proc3. i.e we are using 12+15+10=37 storage locations to store
2305    34 values.
2306 
2307    When d_nnz, o_nnz parameters are specified, the storage is specified
2308    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2309    In the above case the values for d_nnz,o_nnz are:
2310 .vb
2311      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2312      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2313      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2314 .ve
2315    Here the space allocated is sum of all the above values i.e 34, and
2316    hence pre-allocation is perfect.
2317 
2318    Level: intermediate
2319 
2320 .keywords: matrix, aij, compressed row, sparse, parallel
2321 
2322 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2323 @*/
2324 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2325 {
2326   int ierr,size;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2330   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2331   if (size > 1) {
2332     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2333     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2334   } else {
2335     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2336     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNC__
2342 #define __FUNC__ "MatMPIAIJGetSeqAIJ"
2343 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2344 {
2345   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2346   PetscFunctionBegin;
2347   *Ad     = a->A;
2348   *Ao     = a->B;
2349   *colmap = a->garray;
2350   PetscFunctionReturn(0);
2351 }
2352