xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 273d9f13de75c4ed17021f7f2c11eebb99d26f0d)
1 /*$Id: mpiaij.c,v 1.321 2000/09/28 21:11:06 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ /*<a name="CreateColmap_MPIAIJ_Private"></a>*/"CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   aij->colmap = (int*)PetscMalloc((mat->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
36   PLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         Scalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \
76         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \
150         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNC__
193 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
197   Scalar     value;
198   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int        cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat        A = aij->A;
204   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
205   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   Scalar     *aa = a->a;
207   PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat        B = aij->B;
209   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
210   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   Scalar     *ba = b->a;
212 
213   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int        nonew = a->nonew,shift = a->indexshift;
215   Scalar     *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNC__
279 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNC__
322 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNC__
349 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNC__
407 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433 
434   PetscFunctionBegin;
435   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   procs  = nprocs + size;
442   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
443   for (i=0; i<N; i++) {
444     idx = rows[i];
445     found = 0;
446     for (j=0; j<size; j++) {
447       if (idx >= owners[j] && idx < owners[j+1]) {
448         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
449       }
450     }
451     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
452   }
453   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
454 
455   /* inform other processors of number of messages and max length*/
456   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
457   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
458   nrecvs = work[size+rank];
459   nmax   = work[rank];
460   ierr   = PetscFree(work);CHKERRQ(ierr);
461 
462   /* post receives:   */
463   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
464   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
465   for (i=0; i<nrecvs; i++) {
466     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
467   }
468 
469   /* do sends:
470       1) starts[i] gives the starting index in svalues for stuff going to
471          the ith processor
472   */
473   svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
474   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
475   starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
476   starts[0] = 0;
477   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
478   for (i=0; i<N; i++) {
479     svalues[starts[owner[i]]++] = rows[i];
480   }
481   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
482 
483   starts[0] = 0;
484   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
485   count = 0;
486   for (i=0; i<size; i++) {
487     if (procs[i]) {
488       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
489     }
490   }
491   ierr = PetscFree(starts);CHKERRQ(ierr);
492 
493   base = owners[rank];
494 
495   /*  wait on receives */
496   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
497   source = lens + nrecvs;
498   count  = nrecvs; slen = 0;
499   while (count) {
500     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
501     /* unpack receives into our local space */
502     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
503     source[imdex]  = recv_status.MPI_SOURCE;
504     lens[imdex]    = n;
505     slen          += n;
506     count--;
507   }
508   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
509 
510   /* move the data into the send scatter */
511   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
512   count = 0;
513   for (i=0; i<nrecvs; i++) {
514     values = rvalues + i*nmax;
515     for (j=0; j<lens[i]; j++) {
516       lrows[count++] = values[j] - base;
517     }
518   }
519   ierr = PetscFree(rvalues);CHKERRQ(ierr);
520   ierr = PetscFree(lens);CHKERRQ(ierr);
521   ierr = PetscFree(owner);CHKERRQ(ierr);
522   ierr = PetscFree(nprocs);CHKERRQ(ierr);
523 
524   /* actually zap the local rows */
525   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
526   PLogObjectParent(A,istmp);
527 
528   /*
529         Zero the required rows. If the "diagonal block" of the matrix
530      is square and the user wishes to set the diagonal we use seperate
531      code so that MatSetValues() is not called for each diagonal allocating
532      new memory, thus calling lots of mallocs and slowing things down.
533 
534        Contributed by: Mathew Knepley
535   */
536   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
537   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
538   if (diag && (l->A->M == l->A->N)) {
539     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
540   } else if (diag) {
541     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
542     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
543       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
544 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
545     }
546     for (i = 0; i < slen; i++) {
547       row  = lrows[i] + rstart;
548       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
549     }
550     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552   } else {
553     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
554   }
555   ierr = ISDestroy(istmp);CHKERRQ(ierr);
556   ierr = PetscFree(lrows);CHKERRQ(ierr);
557 
558   /* wait on sends */
559   if (nsends) {
560     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
561     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
562     ierr        = PetscFree(send_status);CHKERRQ(ierr);
563   }
564   ierr = PetscFree(send_waits);CHKERRQ(ierr);
565   ierr = PetscFree(svalues);CHKERRQ(ierr);
566 
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNC__
571 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ"
572 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
573 {
574   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
575   int        ierr,nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ"
591 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNC__
605 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ"
606 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNC__
626 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ"
627 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
628 {
629   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
630   int        ierr;
631 
632   PetscFunctionBegin;
633   /* do nondiagonal part */
634   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
635   /* send it on its way */
636   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   /* do local part */
638   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
639   /* receive remote parts: note this assumes the values are not actually */
640   /* inserted in yy until the next line, which is true for my implementation*/
641   /* but is not perhaps always true. */
642   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 /*
647   This only works correctly for square matrices where the subblock A->A is the
648    diagonal block
649 */
650 #undef __FUNC__
651 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ"
652 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
653 {
654   int        ierr;
655   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
656 
657   PetscFunctionBegin;
658   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
659   if (a->rstart != a->cstart || a->rend != a->cend) {
660     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
661   }
662   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNC__
667 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ"
668 int MatScale_MPIAIJ(Scalar *aa,Mat A)
669 {
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671   int        ierr;
672 
673   PetscFunctionBegin;
674   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
675   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNC__
680 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ"
681 int MatDestroy_MPIAIJ(Mat mat)
682 {
683   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
684   int        ierr;
685 
686   PetscFunctionBegin;
687 #if defined(PETSC_USE_LOG)
688   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
689 #endif
690   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
691   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
692   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
694 #if defined (PETSC_USE_CTABLE)
695   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
696 #else
697   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
698 #endif
699   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
700   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
701   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
702   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
703   ierr = PetscFree(aij);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket"
709 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
710 {
711   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
712   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
713   int         ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size;
714   PetscTruth  isdraw,isascii,flg;
715   Viewer      sviewer;
716 
717   PetscFunctionBegin;
718   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
719   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
720   if (isascii) {
721     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
722     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
723       MatInfo info;
724       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
725       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
726       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
727       if (flg) {
728         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
729 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
730       } else {
731         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
732 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
733       }
734       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
735       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
736       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
739       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
740       PetscFunctionReturn(0);
741     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
742       PetscFunctionReturn(0);
743     }
744   } else if (isdraw) {
745     Draw       draw;
746     PetscTruth isnull;
747     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
748     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
749   }
750 
751   if (size == 1) {
752     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
753   } else {
754     /* assemble the entire matrix onto first processor. */
755     Mat         A;
756     Mat_SeqAIJ *Aloc;
757     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
758     Scalar      *a;
759 
760     if (!rank) {
761       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
762     } else {
763       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
764     }
765     PLogObjectParent(mat,A);
766 
767     /* copy over the A part */
768     Aloc = (Mat_SeqAIJ*)aij->A->data;
769     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
770     row = aij->rstart;
771     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
772     for (i=0; i<m; i++) {
773       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
774       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
775     }
776     aj = Aloc->j;
777     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
778 
779     /* copy over the B part */
780     Aloc = (Mat_SeqAIJ*)aij->B->data;
781     m = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
782     row = aij->rstart;
783     ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols);
784     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
785     for (i=0; i<m; i++) {
786       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
787       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
788     }
789     ierr = PetscFree(ct);CHKERRQ(ierr);
790     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
791     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
792     /*
793        Everyone has to call to draw the matrix since the graphics waits are
794        synchronized across all processors that share the Draw object
795     */
796     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
797     if (!rank) {
798       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
799     }
800     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     ierr = MatDestroy(A);CHKERRQ(ierr);
802   }
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNC__
807 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ"
808 int MatView_MPIAIJ(Mat mat,Viewer viewer)
809 {
810   int        ierr;
811   PetscTruth isascii,isdraw,issocket,isbinary;
812 
813   PetscFunctionBegin;
814   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
815   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
816   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
817   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
818   if (isascii || isdraw || isbinary || issocket) {
819     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
820   } else {
821     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 /*
827     This has to provide several versions.
828 
829      2) a) use only local smoothing updating outer values only once.
830         b) local smoothing updating outer values each inner iteration
831      3) color updating out values betwen colors.
832 */
833 #undef __FUNC__
834 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ"
835 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
836 {
837   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
838   Mat        AA = mat->A,BB = mat->B;
839   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
840   Scalar     *b,*x,*xs,*ls,d,*v,sum;
841   int        ierr,*idx,*diag;
842   int        n = matin->n,m = matin->m,i,shift = A->indexshift;
843 
844   PetscFunctionBegin;
845   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
846   diag = A->diag;
847   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
848     if (flag & SOR_ZERO_INITIAL_GUESS) {
849       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
850       PetscFunctionReturn(0);
851     }
852     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
853     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
854     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855     if (xx != bb) {
856       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
857     } else {
858       b = x;
859     }
860     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
861     xs = x + shift; /* shift by one for index start of 1 */
862     ls = ls + shift;
863     while (its--) {
864       /* go down through the rows */
865       for (i=0; i<m; i++) {
866         n    = A->i[i+1] - A->i[i];
867 	PLogFlops(4*n+3);
868         idx  = A->j + A->i[i] + shift;
869         v    = A->a + A->i[i] + shift;
870         sum  = b[i];
871         SPARSEDENSEMDOT(sum,xs,v,idx,n);
872         d    = fshift + A->a[diag[i]+shift];
873         n    = B->i[i+1] - B->i[i];
874         idx  = B->j + B->i[i] + shift;
875         v    = B->a + B->i[i] + shift;
876         SPARSEDENSEMDOT(sum,ls,v,idx,n);
877         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
878       }
879       /* come up through the rows */
880       for (i=m-1; i>-1; i--) {
881         n    = A->i[i+1] - A->i[i];
882 	PLogFlops(4*n+3);
883         idx  = A->j + A->i[i] + shift;
884         v    = A->a + A->i[i] + shift;
885         sum  = b[i];
886         SPARSEDENSEMDOT(sum,xs,v,idx,n);
887         d    = fshift + A->a[diag[i]+shift];
888         n    = B->i[i+1] - B->i[i];
889         idx  = B->j + B->i[i] + shift;
890         v    = B->a + B->i[i] + shift;
891         SPARSEDENSEMDOT(sum,ls,v,idx,n);
892         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
893       }
894     }
895     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
896     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
897     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
898   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
899     if (flag & SOR_ZERO_INITIAL_GUESS) {
900       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
901       PetscFunctionReturn(0);
902     }
903     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
904     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
905     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
906     if (xx != bb) {
907       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
908     } else {
909       b = x;
910     }
911     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
912     xs = x + shift; /* shift by one for index start of 1 */
913     ls = ls + shift;
914     while (its--) {
915       for (i=0; i<m; i++) {
916         n    = A->i[i+1] - A->i[i];
917 	PLogFlops(4*n+3);
918         idx  = A->j + A->i[i] + shift;
919         v    = A->a + A->i[i] + shift;
920         sum  = b[i];
921         SPARSEDENSEMDOT(sum,xs,v,idx,n);
922         d    = fshift + A->a[diag[i]+shift];
923         n    = B->i[i+1] - B->i[i];
924         idx  = B->j + B->i[i] + shift;
925         v    = B->a + B->i[i] + shift;
926         SPARSEDENSEMDOT(sum,ls,v,idx,n);
927         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
928       }
929     }
930     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
931     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
932     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
933   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
934     if (flag & SOR_ZERO_INITIAL_GUESS) {
935       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
936       PetscFunctionReturn(0);
937     }
938     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
939     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
940     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
941     if (xx != bb) {
942       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
943     } else {
944       b = x;
945     }
946     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
947     xs = x + shift; /* shift by one for index start of 1 */
948     ls = ls + shift;
949     while (its--) {
950       for (i=m-1; i>-1; i--) {
951         n    = A->i[i+1] - A->i[i];
952 	PLogFlops(4*n+3);
953         idx  = A->j + A->i[i] + shift;
954         v    = A->a + A->i[i] + shift;
955         sum  = b[i];
956         SPARSEDENSEMDOT(sum,xs,v,idx,n);
957         d    = fshift + A->a[diag[i]+shift];
958         n    = B->i[i+1] - B->i[i];
959         idx  = B->j + B->i[i] + shift;
960         v    = B->a + B->i[i] + shift;
961         SPARSEDENSEMDOT(sum,ls,v,idx,n);
962         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
963       }
964     }
965     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
966     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
967     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
968   } else {
969     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
970   }
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNC__
975 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ"
976 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
977 {
978   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
979   Mat        A = mat->A,B = mat->B;
980   int        ierr;
981   PetscReal  isend[5],irecv[5];
982 
983   PetscFunctionBegin;
984   info->block_size     = 1.0;
985   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
986   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
987   isend[3] = info->memory;  isend[4] = info->mallocs;
988   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
989   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
990   isend[3] += info->memory;  isend[4] += info->mallocs;
991   if (flag == MAT_LOCAL) {
992     info->nz_used      = isend[0];
993     info->nz_allocated = isend[1];
994     info->nz_unneeded  = isend[2];
995     info->memory       = isend[3];
996     info->mallocs      = isend[4];
997   } else if (flag == MAT_GLOBAL_MAX) {
998     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
999     info->nz_used      = irecv[0];
1000     info->nz_allocated = irecv[1];
1001     info->nz_unneeded  = irecv[2];
1002     info->memory       = irecv[3];
1003     info->mallocs      = irecv[4];
1004   } else if (flag == MAT_GLOBAL_SUM) {
1005     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1006     info->nz_used      = irecv[0];
1007     info->nz_allocated = irecv[1];
1008     info->nz_unneeded  = irecv[2];
1009     info->memory       = irecv[3];
1010     info->mallocs      = irecv[4];
1011   }
1012   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1013   info->fill_ratio_needed = 0;
1014   info->factor_mallocs    = 0;
1015   info->rows_global       = (double)matin->M;
1016   info->columns_global    = (double)matin->N;
1017   info->rows_local        = (double)matin->m;
1018   info->columns_local     = (double)matin->N;
1019 
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNC__
1024 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ"
1025 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1026 {
1027   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1028   int        ierr;
1029 
1030   PetscFunctionBegin;
1031   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1032       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1033       op == MAT_COLUMNS_UNSORTED ||
1034       op == MAT_COLUMNS_SORTED ||
1035       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1036       op == MAT_KEEP_ZEROED_ROWS ||
1037       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1038       op == MAT_USE_INODES ||
1039       op == MAT_DO_NOT_USE_INODES ||
1040       op == MAT_IGNORE_ZERO_ENTRIES) {
1041         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1042         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1043   } else if (op == MAT_ROW_ORIENTED) {
1044     a->roworiented = PETSC_TRUE;
1045     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1046     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1047   } else if (op == MAT_ROWS_SORTED ||
1048              op == MAT_ROWS_UNSORTED ||
1049              op == MAT_SYMMETRIC ||
1050              op == MAT_STRUCTURALLY_SYMMETRIC ||
1051              op == MAT_YES_NEW_DIAGONALS) {
1052     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1053   } else if (op == MAT_COLUMN_ORIENTED) {
1054     a->roworiented = PETSC_FALSE;
1055     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1056     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1057   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1058     a->donotstash = PETSC_TRUE;
1059   } else if (op == MAT_NO_NEW_DIAGONALS){
1060     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1061   } else {
1062     SETERRQ(PETSC_ERR_SUP,"unknown option");
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ"
1069 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1070 {
1071   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1072 
1073   PetscFunctionBegin;
1074   if (m) *m = mat->rstart;
1075   if (n) *n = mat->rend;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNC__
1080 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ"
1081 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1084   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1085   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1086   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1087   int        *cmap,*idx_p;
1088 
1089   PetscFunctionBegin;
1090   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1091   mat->getrowactive = PETSC_TRUE;
1092 
1093   if (!mat->rowvalues && (idx || v)) {
1094     /*
1095         allocate enough space to hold information from the longest row.
1096     */
1097     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1098     int     max = 1,tmp;
1099     for (i=0; i<matin->m; i++) {
1100       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1101       if (max < tmp) { max = tmp; }
1102     }
1103     mat->rowvalues  = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1104     mat->rowindices = (int*)(mat->rowvalues + max);
1105   }
1106 
1107   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1108   lrow = row - rstart;
1109 
1110   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1111   if (!v)   {pvA = 0; pvB = 0;}
1112   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1113   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1114   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1115   nztot = nzA + nzB;
1116 
1117   cmap  = mat->garray;
1118   if (v  || idx) {
1119     if (nztot) {
1120       /* Sort by increasing column numbers, assuming A and B already sorted */
1121       int imark = -1;
1122       if (v) {
1123         *v = v_p = mat->rowvalues;
1124         for (i=0; i<nzB; i++) {
1125           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1126           else break;
1127         }
1128         imark = i;
1129         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1130         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1131       }
1132       if (idx) {
1133         *idx = idx_p = mat->rowindices;
1134         if (imark > -1) {
1135           for (i=0; i<imark; i++) {
1136             idx_p[i] = cmap[cworkB[i]];
1137           }
1138         } else {
1139           for (i=0; i<nzB; i++) {
1140             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1141             else break;
1142           }
1143           imark = i;
1144         }
1145         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1146         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1147       }
1148     } else {
1149       if (idx) *idx = 0;
1150       if (v)   *v   = 0;
1151     }
1152   }
1153   *nz = nztot;
1154   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1155   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ"
1161 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1162 {
1163   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1164 
1165   PetscFunctionBegin;
1166   if (aij->getrowactive == PETSC_FALSE) {
1167     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1168   }
1169   aij->getrowactive = PETSC_FALSE;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNC__
1174 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ"
1175 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1176 {
1177   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1178   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1179   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1180   PetscReal  sum = 0.0;
1181   Scalar     *v;
1182 
1183   PetscFunctionBegin;
1184   if (aij->size == 1) {
1185     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1186   } else {
1187     if (type == NORM_FROBENIUS) {
1188       v = amat->a;
1189       for (i=0; i<amat->nz; i++) {
1190 #if defined(PETSC_USE_COMPLEX)
1191         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1192 #else
1193         sum += (*v)*(*v); v++;
1194 #endif
1195       }
1196       v = bmat->a;
1197       for (i=0; i<bmat->nz; i++) {
1198 #if defined(PETSC_USE_COMPLEX)
1199         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1200 #else
1201         sum += (*v)*(*v); v++;
1202 #endif
1203       }
1204       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1205       *norm = sqrt(*norm);
1206     } else if (type == NORM_1) { /* max column norm */
1207       PetscReal *tmp,*tmp2;
1208       int    *jj,*garray = aij->garray;
1209       tmp  = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp);
1210       tmp2 = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2);
1211       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1212       *norm = 0.0;
1213       v = amat->a; jj = amat->j;
1214       for (j=0; j<amat->nz; j++) {
1215         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1216       }
1217       v = bmat->a; jj = bmat->j;
1218       for (j=0; j<bmat->nz; j++) {
1219         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1220       }
1221       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1222       for (j=0; j<mat->N; j++) {
1223         if (tmp2[j] > *norm) *norm = tmp2[j];
1224       }
1225       ierr = PetscFree(tmp);CHKERRQ(ierr);
1226       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1227     } else if (type == NORM_INFINITY) { /* max row norm */
1228       PetscReal ntemp = 0.0;
1229       for (j=0; j<aij->A->m; j++) {
1230         v = amat->a + amat->i[j] + shift;
1231         sum = 0.0;
1232         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1233           sum += PetscAbsScalar(*v); v++;
1234         }
1235         v = bmat->a + bmat->i[j] + shift;
1236         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1237           sum += PetscAbsScalar(*v); v++;
1238         }
1239         if (sum > ntemp) ntemp = sum;
1240       }
1241       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1242     } else {
1243       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1244     }
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNC__
1250 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ"
1251 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1252 {
1253   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1254   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1255   int        ierr,shift = Aloc->indexshift;
1256   int        M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1257   Mat        B;
1258   Scalar     *array;
1259 
1260   PetscFunctionBegin;
1261   if (!matout && M != N) {
1262     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1263   }
1264 
1265   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1266 
1267   /* copy over the A part */
1268   Aloc = (Mat_SeqAIJ*)a->A->data;
1269   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1270   row = a->rstart;
1271   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1272   for (i=0; i<m; i++) {
1273     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1274     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1275   }
1276   aj = Aloc->j;
1277   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1278 
1279   /* copy over the B part */
1280   Aloc = (Mat_SeqAIJ*)a->B->data;
1281   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1282   row = a->rstart;
1283   ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols);
1284   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1285   for (i=0; i<m; i++) {
1286     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1287     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1288   }
1289   ierr = PetscFree(ct);CHKERRQ(ierr);
1290   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1291   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292   if (matout) {
1293     *matout = B;
1294   } else {
1295     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ"
1302 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1303 {
1304   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1305   Mat        a = aij->A,b = aij->B;
1306   int        ierr,s1,s2,s3;
1307 
1308   PetscFunctionBegin;
1309   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1310   if (rr) {
1311     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1312     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1313     /* Overlap communication with computation. */
1314     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1315   }
1316   if (ll) {
1317     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1318     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1319     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1320   }
1321   /* scale  the diagonal block */
1322   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1323 
1324   if (rr) {
1325     /* Do a scatter end and then right scale the off-diagonal block */
1326     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1327     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1328   }
1329 
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 
1334 #undef __FUNC__
1335 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ"
1336 int MatPrintHelp_MPIAIJ(Mat A)
1337 {
1338   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1339   int        ierr;
1340 
1341   PetscFunctionBegin;
1342   if (!a->rank) {
1343     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNC__
1349 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ"
1350 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1351 {
1352   PetscFunctionBegin;
1353   *bs = 1;
1354   PetscFunctionReturn(0);
1355 }
1356 #undef __FUNC__
1357 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ"
1358 int MatSetUnfactored_MPIAIJ(Mat A)
1359 {
1360   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1361   int        ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNC__
1369 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ"
1370 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1371 {
1372   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1373   Mat        a,b,c,d;
1374   PetscTruth flg;
1375   int        ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1379   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1380   a = matA->A; b = matA->B;
1381   c = matB->A; d = matB->B;
1382 
1383   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1384   if (flg == PETSC_TRUE) {
1385     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1386   }
1387   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 #undef __FUNC__
1392 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ"
1393 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1394 {
1395   int        ierr;
1396   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1397   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1398   PetscTruth flg;
1399 
1400   PetscFunctionBegin;
1401   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1402   if (str != SAME_NONZERO_PATTERN || !flg) {
1403     /* because of the column compression in the off-processor part of the matrix a->B,
1404        the number of columns in a->B and b->B may be different, hence we cannot call
1405        the MatCopy() directly on the two parts. If need be, we can provide a more
1406        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1407        then copying the submatrices */
1408     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1409   } else {
1410     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1411     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNC__
1417 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIAIJ"></a>*/"MatSetUpPreallocation_MPIAIJ"
1418 int MatSetUpPreallocation_MPIAIJ(Mat A)
1419 {
1420   int        ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1428 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1429 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1430 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1431 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1432 
1433 /* -------------------------------------------------------------------*/
1434 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1435        MatGetRow_MPIAIJ,
1436        MatRestoreRow_MPIAIJ,
1437        MatMult_MPIAIJ,
1438        MatMultAdd_MPIAIJ,
1439        MatMultTranspose_MPIAIJ,
1440        MatMultTransposeAdd_MPIAIJ,
1441        0,
1442        0,
1443        0,
1444        0,
1445        0,
1446        0,
1447        MatRelax_MPIAIJ,
1448        MatTranspose_MPIAIJ,
1449        MatGetInfo_MPIAIJ,
1450        MatEqual_MPIAIJ,
1451        MatGetDiagonal_MPIAIJ,
1452        MatDiagonalScale_MPIAIJ,
1453        MatNorm_MPIAIJ,
1454        MatAssemblyBegin_MPIAIJ,
1455        MatAssemblyEnd_MPIAIJ,
1456        0,
1457        MatSetOption_MPIAIJ,
1458        MatZeroEntries_MPIAIJ,
1459        MatZeroRows_MPIAIJ,
1460        0,
1461        0,
1462        0,
1463        0,
1464        MatSetUpPreallocation_MPIAIJ,
1465        0,
1466        MatGetOwnershipRange_MPIAIJ,
1467        0,
1468        0,
1469        0,
1470        0,
1471        MatDuplicate_MPIAIJ,
1472        0,
1473        0,
1474        0,
1475        0,
1476        0,
1477        MatGetSubMatrices_MPIAIJ,
1478        MatIncreaseOverlap_MPIAIJ,
1479        MatGetValues_MPIAIJ,
1480        MatCopy_MPIAIJ,
1481        MatPrintHelp_MPIAIJ,
1482        MatScale_MPIAIJ,
1483        0,
1484        0,
1485        0,
1486        MatGetBlockSize_MPIAIJ,
1487        0,
1488        0,
1489        0,
1490        0,
1491        MatFDColoringCreate_MPIAIJ,
1492        0,
1493        MatSetUnfactored_MPIAIJ,
1494        0,
1495        0,
1496        MatGetSubMatrix_MPIAIJ,
1497        MatDestroy_MPIAIJ,
1498        MatView_MPIAIJ,
1499        MatGetMaps_Petsc};
1500 
1501 /* ----------------------------------------------------------------------------------------*/
1502 
1503 EXTERN_C_BEGIN
1504 #undef __FUNC__
1505 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ"
1506 int MatStoreValues_MPIAIJ(Mat mat)
1507 {
1508   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1509   int        ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1513   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 EXTERN_C_END
1517 
1518 EXTERN_C_BEGIN
1519 #undef __FUNC__
1520 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ"
1521 int MatRetrieveValues_MPIAIJ(Mat mat)
1522 {
1523   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1524   int        ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1528   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 EXTERN_C_END
1532 
1533 #include "petscpc.h"
1534 EXTERN_C_BEGIN
1535 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1536 EXTERN_C_END
1537 
1538 EXTERN_C_BEGIN
1539 #undef __FUNC__
1540 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIAIJ"
1541 int MatCreate_MPIAIJ(Mat B)
1542 {
1543   Mat_MPIAIJ   *b;
1544   int          ierr,i,size;
1545 
1546   PetscFunctionBegin;
1547   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1548 
1549   B->data         = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1550   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1551   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1552   B->factor       = 0;
1553   B->assembled    = PETSC_FALSE;
1554   B->mapping      = 0;
1555 
1556   B->insertmode      = NOT_SET_VALUES;
1557   b->size            = size;
1558   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1559 
1560   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1561   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1562 
1563   /* the information in the maps duplicates the information computed below, eventually
1564      we should remove the duplicate information that is not contained in the maps */
1565   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1566   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1567 
1568   /* build local table of row and column ownerships */
1569   b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1570   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1571   b->cowners = b->rowners + b->size + 2;
1572   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1573   b->rowners[0] = 0;
1574   for (i=2; i<=b->size; i++) {
1575     b->rowners[i] += b->rowners[i-1];
1576   }
1577   b->rstart = b->rowners[b->rank];
1578   b->rend   = b->rowners[b->rank+1];
1579   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1580   b->cowners[0] = 0;
1581   for (i=2; i<=b->size; i++) {
1582     b->cowners[i] += b->cowners[i-1];
1583   }
1584   b->cstart = b->cowners[b->rank];
1585   b->cend   = b->cowners[b->rank+1];
1586 
1587   /* build cache for off array entries formed */
1588   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1589   b->donotstash  = PETSC_FALSE;
1590   b->colmap      = 0;
1591   b->garray      = 0;
1592   b->roworiented = PETSC_TRUE;
1593 
1594   /* stuff used for matrix vector multiply */
1595   b->lvec      = PETSC_NULL;
1596   b->Mvctx     = PETSC_NULL;
1597 
1598   /* stuff for MatGetRow() */
1599   b->rowindices   = 0;
1600   b->rowvalues    = 0;
1601   b->getrowactive = PETSC_FALSE;
1602 
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1604                                      "MatStoreValues_MPIAIJ",
1605                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1607                                      "MatRetrieveValues_MPIAIJ",
1608                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1610                                      "MatGetDiagonalBlock_MPIAIJ",
1611                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1612   PetscFunctionReturn(0);
1613 }
1614 EXTERN_C_END
1615 
1616 #undef __FUNC__
1617 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ"
1618 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1619 {
1620   Mat        mat;
1621   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1622   int        ierr,len = 0;
1623 
1624   PetscFunctionBegin;
1625   *newmat       = 0;
1626   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1627   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1628   a    = (Mat_MPIAIJ*)mat->data;
1629   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1630   mat->factor       = matin->factor;
1631   mat->assembled    = PETSC_TRUE;
1632   mat->insertmode   = NOT_SET_VALUES;
1633   mat->preallocated = PETSC_TRUE;
1634 
1635   a->rstart       = oldmat->rstart;
1636   a->rend         = oldmat->rend;
1637   a->cstart       = oldmat->cstart;
1638   a->cend         = oldmat->cend;
1639   a->size         = oldmat->size;
1640   a->rank         = oldmat->rank;
1641   a->donotstash   = oldmat->donotstash;
1642   a->roworiented  = oldmat->roworiented;
1643   a->rowindices   = 0;
1644   a->rowvalues    = 0;
1645   a->getrowactive = PETSC_FALSE;
1646 
1647   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1648   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1649   if (oldmat->colmap) {
1650 #if defined (PETSC_USE_CTABLE)
1651     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1652 #else
1653     a->colmap = (int*)PetscMalloc((mat->N)*sizeof(int));CHKPTRQ(a->colmap);
1654     PLogObjectMemory(mat,(mat->N)*sizeof(int));
1655     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1656 #endif
1657   } else a->colmap = 0;
1658   if (oldmat->garray) {
1659     len = oldmat->B->n;
1660     a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1661     PLogObjectMemory(mat,len*sizeof(int));
1662     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1663   } else a->garray = 0;
1664 
1665   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1666   PLogObjectParent(mat,a->lvec);
1667   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1668   PLogObjectParent(mat,a->Mvctx);
1669   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1670   PLogObjectParent(mat,a->A);
1671   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1672   PLogObjectParent(mat,a->B);
1673   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1674   *newmat = mat;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #include "petscsys.h"
1679 
1680 EXTERN_C_BEGIN
1681 #undef __FUNC__
1682 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ"
1683 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1684 {
1685   Mat          A;
1686   Scalar       *vals,*svals;
1687   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1688   MPI_Status   status;
1689   int          i,nz,ierr,j,rstart,rend,fd;
1690   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1691   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1692   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1693 
1694   PetscFunctionBegin;
1695   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1696   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1697   if (!rank) {
1698     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1699     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1700     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1701     if (header[3] < 0) {
1702       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1703     }
1704   }
1705 
1706   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1707   M = header[1]; N = header[2];
1708   /* determine ownership of all rows */
1709   m = M/size + ((M % size) > rank);
1710   rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1711   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1712   rowners[0] = 0;
1713   for (i=2; i<=size; i++) {
1714     rowners[i] += rowners[i-1];
1715   }
1716   rstart = rowners[rank];
1717   rend   = rowners[rank+1];
1718 
1719   /* distribute row lengths to all processors */
1720   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1721   offlens = ourlens + (rend-rstart);
1722   if (!rank) {
1723     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1724     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1725     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1726     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1727     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1728     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1729   } else {
1730     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1731   }
1732 
1733   if (!rank) {
1734     /* calculate the number of nonzeros on each processor */
1735     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1736     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1737     for (i=0; i<size; i++) {
1738       for (j=rowners[i]; j< rowners[i+1]; j++) {
1739         procsnz[i] += rowlengths[j];
1740       }
1741     }
1742     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1743 
1744     /* determine max buffer needed and allocate it */
1745     maxnz = 0;
1746     for (i=0; i<size; i++) {
1747       maxnz = PetscMax(maxnz,procsnz[i]);
1748     }
1749     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1750 
1751     /* read in my part of the matrix column indices  */
1752     nz     = procsnz[0];
1753     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1754     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1755 
1756     /* read in every one elses and ship off */
1757     for (i=1; i<size; i++) {
1758       nz   = procsnz[i];
1759       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1760       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1761     }
1762     ierr = PetscFree(cols);CHKERRQ(ierr);
1763   } else {
1764     /* determine buffer space needed for message */
1765     nz = 0;
1766     for (i=0; i<m; i++) {
1767       nz += ourlens[i];
1768     }
1769     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1770 
1771     /* receive message of column indices*/
1772     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1773     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1774     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1775   }
1776 
1777   /* determine column ownership if matrix is not square */
1778   if (N != M) {
1779     n      = N/size + ((N % size) > rank);
1780     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1781     cstart = cend - n;
1782   } else {
1783     cstart = rstart;
1784     cend   = rend;
1785     n      = cend - cstart;
1786   }
1787 
1788   /* loop over local rows, determining number of off diagonal entries */
1789   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1790   jj = 0;
1791   for (i=0; i<m; i++) {
1792     for (j=0; j<ourlens[i]; j++) {
1793       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1794       jj++;
1795     }
1796   }
1797 
1798   /* create our matrix */
1799   for (i=0; i<m; i++) {
1800     ourlens[i] -= offlens[i];
1801   }
1802   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1803   A = *newmat;
1804   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1805   for (i=0; i<m; i++) {
1806     ourlens[i] += offlens[i];
1807   }
1808 
1809   if (!rank) {
1810     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1811 
1812     /* read in my part of the matrix numerical values  */
1813     nz   = procsnz[0];
1814     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1815 
1816     /* insert into matrix */
1817     jj      = rstart;
1818     smycols = mycols;
1819     svals   = vals;
1820     for (i=0; i<m; i++) {
1821       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1822       smycols += ourlens[i];
1823       svals   += ourlens[i];
1824       jj++;
1825     }
1826 
1827     /* read in other processors and ship out */
1828     for (i=1; i<size; i++) {
1829       nz   = procsnz[i];
1830       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1831       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1832     }
1833     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1834   } else {
1835     /* receive numeric values */
1836     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
1837 
1838     /* receive message of values*/
1839     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1840     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1841     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1842 
1843     /* insert into matrix */
1844     jj      = rstart;
1845     smycols = mycols;
1846     svals   = vals;
1847     for (i=0; i<m; i++) {
1848       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1849       smycols += ourlens[i];
1850       svals   += ourlens[i];
1851       jj++;
1852     }
1853   }
1854   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1855   ierr = PetscFree(vals);CHKERRQ(ierr);
1856   ierr = PetscFree(mycols);CHKERRQ(ierr);
1857   ierr = PetscFree(rowners);CHKERRQ(ierr);
1858 
1859   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1860   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 EXTERN_C_END
1864 
1865 #undef __FUNC__
1866 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ"
1867 /*
1868     Not great since it makes two copies of the submatrix, first an SeqAIJ
1869   in local and then by concatenating the local matrices the end result.
1870   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1871 */
1872 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1873 {
1874   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1875   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1876   Mat        *local,M,Mreuse;
1877   Scalar     *vwork,*aa;
1878   MPI_Comm   comm = mat->comm;
1879   Mat_SeqAIJ *aij;
1880 
1881 
1882   PetscFunctionBegin;
1883   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1885 
1886   if (call ==  MAT_REUSE_MATRIX) {
1887     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1888     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1889     local = &Mreuse;
1890     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1891   } else {
1892     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1893     Mreuse = *local;
1894     ierr = PetscFree(local);CHKERRQ(ierr);
1895   }
1896 
1897   /*
1898       m - number of local rows
1899       n - number of columns (same on all processors)
1900       rstart - first row in new global matrix generated
1901   */
1902   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1903   if (call == MAT_INITIAL_MATRIX) {
1904     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1905     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1906     ii  = aij->i;
1907     jj  = aij->j;
1908 
1909     /*
1910         Determine the number of non-zeros in the diagonal and off-diagonal
1911         portions of the matrix in order to do correct preallocation
1912     */
1913 
1914     /* first get start and end of "diagonal" columns */
1915     if (csize == PETSC_DECIDE) {
1916       nlocal = n/size + ((n % size) > rank);
1917     } else {
1918       nlocal = csize;
1919     }
1920     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1921     rstart = rend - nlocal;
1922     if (rank == size - 1 && rend != n) {
1923       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1924     }
1925 
1926     /* next, compute all the lengths */
1927     dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens);
1928     olens = dlens + m;
1929     for (i=0; i<m; i++) {
1930       jend = ii[i+1] - ii[i];
1931       olen = 0;
1932       dlen = 0;
1933       for (j=0; j<jend; j++) {
1934         if (*jj < rstart || *jj >= rend) olen++;
1935         else dlen++;
1936         jj++;
1937       }
1938       olens[i] = olen;
1939       dlens[i] = dlen;
1940     }
1941     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1942     ierr = PetscFree(dlens);CHKERRQ(ierr);
1943   } else {
1944     int ml,nl;
1945 
1946     M = *newmat;
1947     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1948     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1949     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1950     /*
1951          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1952        rather than the slower MatSetValues().
1953     */
1954     M->was_assembled = PETSC_TRUE;
1955     M->assembled     = PETSC_FALSE;
1956   }
1957   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1958   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1959   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1960   ii  = aij->i;
1961   jj  = aij->j;
1962   aa  = aij->a;
1963   for (i=0; i<m; i++) {
1964     row   = rstart + i;
1965     nz    = ii[i+1] - ii[i];
1966     cwork = jj;     jj += nz;
1967     vwork = aa;     aa += nz;
1968     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1969   }
1970 
1971   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   *newmat = M;
1974 
1975   /* save submatrix used in processor for next request */
1976   if (call ==  MAT_INITIAL_MATRIX) {
1977     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1978     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1979   }
1980 
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 #undef __FUNC__
1985 #define __FUNC__ /*<a name=""></a>*/"MatMPIAIJSetPreallocation"
1986 /*@C
1987    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1988    (the default parallel PETSc format).  For good matrix assembly performance
1989    the user should preallocate the matrix storage by setting the parameters
1990    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1991    performance can be increased by more than a factor of 50.
1992 
1993    Collective on MPI_Comm
1994 
1995    Input Parameters:
1996 +  A - the matrix
1997 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1998            (same value is used for all local rows)
1999 .  d_nnz - array containing the number of nonzeros in the various rows of the
2000            DIAGONAL portion of the local submatrix (possibly different for each row)
2001            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2002            The size of this array is equal to the number of local rows, i.e 'm'.
2003            You must leave room for the diagonal entry even if it is zero.
2004 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2005            submatrix (same value is used for all local rows).
2006 -  o_nnz - array containing the number of nonzeros in the various rows of the
2007            OFF-DIAGONAL portion of the local submatrix (possibly different for
2008            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2009            structure. The size of this array is equal to the number
2010            of local rows, i.e 'm'.
2011 
2012    The AIJ format (also called the Yale sparse matrix format or
2013    compressed row storage), is fully compatible with standard Fortran 77
2014    storage.  That is, the stored row and column indices can begin at
2015    either one (as in Fortran) or zero.  See the users manual for details.
2016 
2017    The user MUST specify either the local or global matrix dimensions
2018    (possibly both).
2019 
2020    The parallel matrix is partitioned such that the first m0 rows belong to
2021    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2022    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2023 
2024    The DIAGONAL portion of the local submatrix of a processor can be defined
2025    as the submatrix which is obtained by extraction the part corresponding
2026    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2027    first row that belongs to the processor, and r2 is the last row belonging
2028    to the this processor. This is a square mxm matrix. The remaining portion
2029    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2030 
2031    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2032 
2033    By default, this format uses inodes (identical nodes) when possible.
2034    We search for consecutive rows with the same nonzero structure, thereby
2035    reusing matrix information to achieve increased efficiency.
2036 
2037    Options Database Keys:
2038 +  -mat_aij_no_inode  - Do not use inodes
2039 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2040 -  -mat_aij_oneindex - Internally use indexing starting at 1
2041         rather than 0.  Note that when calling MatSetValues(),
2042         the user still MUST index entries starting at 0!
2043 
2044    Example usage:
2045 
2046    Consider the following 8x8 matrix with 34 non-zero values, that is
2047    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2048    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2049    as follows:
2050 
2051 .vb
2052             1  2  0  |  0  3  0  |  0  4
2053     Proc0   0  5  6  |  7  0  0  |  8  0
2054             9  0 10  | 11  0  0  | 12  0
2055     -------------------------------------
2056            13  0 14  | 15 16 17  |  0  0
2057     Proc1   0 18  0  | 19 20 21  |  0  0
2058             0  0  0  | 22 23  0  | 24  0
2059     -------------------------------------
2060     Proc2  25 26 27  |  0  0 28  | 29  0
2061            30  0  0  | 31 32 33  |  0 34
2062 .ve
2063 
2064    This can be represented as a collection of submatrices as:
2065 
2066 .vb
2067       A B C
2068       D E F
2069       G H I
2070 .ve
2071 
2072    Where the submatrices A,B,C are owned by proc0, D,E,F are
2073    owned by proc1, G,H,I are owned by proc2.
2074 
2075    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2076    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2077    The 'M','N' parameters are 8,8, and have the same values on all procs.
2078 
2079    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2080    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2081    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2082    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2083    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2084    matrix, ans [DF] as another SeqAIJ matrix.
2085 
2086    When d_nz, o_nz parameters are specified, d_nz storage elements are
2087    allocated for every row of the local diagonal submatrix, and o_nz
2088    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2089    One way to choose d_nz and o_nz is to use the max nonzerors per local
2090    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2091    In this case, the values of d_nz,o_nz are:
2092 .vb
2093      proc0 : dnz = 2, o_nz = 2
2094      proc1 : dnz = 3, o_nz = 2
2095      proc2 : dnz = 1, o_nz = 4
2096 .ve
2097    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2098    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2099    for proc3. i.e we are using 12+15+10=37 storage locations to store
2100    34 values.
2101 
2102    When d_nnz, o_nnz parameters are specified, the storage is specified
2103    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2104    In the above case the values for d_nnz,o_nnz are:
2105 .vb
2106      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2107      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2108      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2109 .ve
2110    Here the space allocated is sum of all the above values i.e 34, and
2111    hence pre-allocation is perfect.
2112 
2113    Level: intermediate
2114 
2115 .keywords: matrix, aij, compressed row, sparse, parallel
2116 
2117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2118 @*/
2119 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2120 {
2121   Mat_MPIAIJ   *b;
2122   int          ierr,i;
2123   PetscTruth   flg2;
2124 
2125   PetscFunctionBegin;
2126   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2127   if (!flg2) PetscFunctionReturn(0);
2128   B->preallocated = PETSC_TRUE;
2129   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz);
2130   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz);
2131   if (d_nnz) {
2132     for (i=0; i<B->m; i++) {
2133       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2134     }
2135   }
2136   if (o_nnz) {
2137     for (i=0; i<B->m; i++) {
2138       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2139     }
2140   }
2141   b = (Mat_MPIAIJ*)B->data;
2142 
2143   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2144   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2145   PLogObjectParent(B,b->A);
2146   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2147   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2148   PLogObjectParent(B,b->B);
2149 
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNC__
2154 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ"
2155 /*@C
2156    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2157    (the default parallel PETSc format).  For good matrix assembly performance
2158    the user should preallocate the matrix storage by setting the parameters
2159    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2160    performance can be increased by more than a factor of 50.
2161 
2162    Collective on MPI_Comm
2163 
2164    Input Parameters:
2165 +  comm - MPI communicator
2166 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2167            This value should be the same as the local size used in creating the
2168            y vector for the matrix-vector product y = Ax.
2169 .  n - This value should be the same as the local size used in creating the
2170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2171        calculated if N is given) For square matrices n is almost always m.
2172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2174 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2175            (same value is used for all local rows)
2176 .  d_nnz - array containing the number of nonzeros in the various rows of the
2177            DIAGONAL portion of the local submatrix (possibly different for each row)
2178            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2179            The size of this array is equal to the number of local rows, i.e 'm'.
2180            You must leave room for the diagonal entry even if it is zero.
2181 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2182            submatrix (same value is used for all local rows).
2183 -  o_nnz - array containing the number of nonzeros in the various rows of the
2184            OFF-DIAGONAL portion of the local submatrix (possibly different for
2185            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2186            structure. The size of this array is equal to the number
2187            of local rows, i.e 'm'.
2188 
2189    Output Parameter:
2190 .  A - the matrix
2191 
2192    Notes:
2193    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2194    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2195    storage requirements for this matrix.
2196 
2197    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2198    processor than it must be used on all processors that share the object for
2199    that argument.
2200 
2201    The AIJ format (also called the Yale sparse matrix format or
2202    compressed row storage), is fully compatible with standard Fortran 77
2203    storage.  That is, the stored row and column indices can begin at
2204    either one (as in Fortran) or zero.  See the users manual for details.
2205 
2206    The user MUST specify either the local or global matrix dimensions
2207    (possibly both).
2208 
2209    The parallel matrix is partitioned such that the first m0 rows belong to
2210    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2211    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2212 
2213    The DIAGONAL portion of the local submatrix of a processor can be defined
2214    as the submatrix which is obtained by extraction the part corresponding
2215    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2216    first row that belongs to the processor, and r2 is the last row belonging
2217    to the this processor. This is a square mxm matrix. The remaining portion
2218    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2219 
2220    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2221 
2222    By default, this format uses inodes (identical nodes) when possible.
2223    We search for consecutive rows with the same nonzero structure, thereby
2224    reusing matrix information to achieve increased efficiency.
2225 
2226    Options Database Keys:
2227 +  -mat_aij_no_inode  - Do not use inodes
2228 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2229 -  -mat_aij_oneindex - Internally use indexing starting at 1
2230         rather than 0.  Note that when calling MatSetValues(),
2231         the user still MUST index entries starting at 0!
2232 
2233 
2234    Example usage:
2235 
2236    Consider the following 8x8 matrix with 34 non-zero values, that is
2237    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2238    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2239    as follows:
2240 
2241 .vb
2242             1  2  0  |  0  3  0  |  0  4
2243     Proc0   0  5  6  |  7  0  0  |  8  0
2244             9  0 10  | 11  0  0  | 12  0
2245     -------------------------------------
2246            13  0 14  | 15 16 17  |  0  0
2247     Proc1   0 18  0  | 19 20 21  |  0  0
2248             0  0  0  | 22 23  0  | 24  0
2249     -------------------------------------
2250     Proc2  25 26 27  |  0  0 28  | 29  0
2251            30  0  0  | 31 32 33  |  0 34
2252 .ve
2253 
2254    This can be represented as a collection of submatrices as:
2255 
2256 .vb
2257       A B C
2258       D E F
2259       G H I
2260 .ve
2261 
2262    Where the submatrices A,B,C are owned by proc0, D,E,F are
2263    owned by proc1, G,H,I are owned by proc2.
2264 
2265    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2266    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2267    The 'M','N' parameters are 8,8, and have the same values on all procs.
2268 
2269    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2270    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2271    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2272    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2273    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2274    matrix, ans [DF] as another SeqAIJ matrix.
2275 
2276    When d_nz, o_nz parameters are specified, d_nz storage elements are
2277    allocated for every row of the local diagonal submatrix, and o_nz
2278    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2279    One way to choose d_nz and o_nz is to use the max nonzerors per local
2280    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2281    In this case, the values of d_nz,o_nz are:
2282 .vb
2283      proc0 : dnz = 2, o_nz = 2
2284      proc1 : dnz = 3, o_nz = 2
2285      proc2 : dnz = 1, o_nz = 4
2286 .ve
2287    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2288    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2289    for proc3. i.e we are using 12+15+10=37 storage locations to store
2290    34 values.
2291 
2292    When d_nnz, o_nnz parameters are specified, the storage is specified
2293    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2294    In the above case the values for d_nnz,o_nnz are:
2295 .vb
2296      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2297      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2298      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2299 .ve
2300    Here the space allocated is sum of all the above values i.e 34, and
2301    hence pre-allocation is perfect.
2302 
2303    Level: intermediate
2304 
2305 .keywords: matrix, aij, compressed row, sparse, parallel
2306 
2307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2308 @*/
2309 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2310 {
2311   int ierr,size;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2315   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2316   if (size > 1) {
2317     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2318     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2319   } else {
2320     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2321     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2322   }
2323   PetscFunctionReturn(0);
2324 }
2325