xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 35d8aa7f675fa4ee5c80aa031e65d3ef307a4891)
1 /*$Id: mpiaij.c,v 1.322 2000/10/24 20:25:36 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ /*<a name="CreateColmap_MPIAIJ_Private"></a>*/"CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   aij->colmap = (int*)PetscMalloc((mat->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
36   PLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         Scalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \
76         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \
150         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNC__
193 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
197   Scalar     value;
198   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int        cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat        A = aij->A;
204   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
205   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   Scalar     *aa = a->a;
207   PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat        B = aij->B;
209   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
210   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   Scalar     *ba = b->a;
212 
213   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int        nonew = a->nonew,shift = a->indexshift;
215   Scalar     *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNC__
279 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNC__
322 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNC__
349 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNC__
407 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
465   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
475   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
476   starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
562     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr        = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNC__
572 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNC__
606 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNC__
627 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNC__
652 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNC__
668 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(Scalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNC__
681 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNC__
709 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
711 {
712   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
714   int         ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth  isdraw,isascii,flg;
716   Viewer      sviewer;
717 
718   PetscFunctionBegin;
719   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
720   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
721   if (isascii) {
722     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
723     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
724       MatInfo info;
725       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
726       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
727       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
728       if (flg) {
729         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
730 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
731       } else {
732         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
733 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
734       }
735       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
737       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
738       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
739       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
740       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
741       PetscFunctionReturn(0);
742     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
743       PetscFunctionReturn(0);
744     }
745   } else if (isdraw) {
746     Draw       draw;
747     PetscTruth isnull;
748     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
749     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
750   }
751 
752   if (size == 1) {
753     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
754   } else {
755     /* assemble the entire matrix onto first processor. */
756     Mat         A;
757     Mat_SeqAIJ *Aloc;
758     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
759     Scalar      *a;
760 
761     if (!rank) {
762       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
763     } else {
764       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     }
766     PLogObjectParent(mat,A);
767 
768     /* copy over the A part */
769     Aloc = (Mat_SeqAIJ*)aij->A->data;
770     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
771     row = aij->rstart;
772     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
773     for (i=0; i<m; i++) {
774       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
775       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
776     }
777     aj = Aloc->j;
778     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
779 
780     /* copy over the B part */
781     Aloc = (Mat_SeqAIJ*)aij->B->data;
782     m = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
783     row = aij->rstart;
784     ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols);
785     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
786     for (i=0; i<m; i++) {
787       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
788       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
789     }
790     ierr = PetscFree(ct);CHKERRQ(ierr);
791     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
792     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
793     /*
794        Everyone has to call to draw the matrix since the graphics waits are
795        synchronized across all processors that share the Draw object
796     */
797     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
798     if (!rank) {
799       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
800     }
801     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
802     ierr = MatDestroy(A);CHKERRQ(ierr);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNC__
808 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ"
809 int MatView_MPIAIJ(Mat mat,Viewer viewer)
810 {
811   int        ierr;
812   PetscTruth isascii,isdraw,issocket,isbinary;
813 
814   PetscFunctionBegin;
815   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
816   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
817   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
818   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
819   if (isascii || isdraw || isbinary || issocket) {
820     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
821   } else {
822     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
823   }
824   PetscFunctionReturn(0);
825 }
826 
827 /*
828     This has to provide several versions.
829 
830      2) a) use only local smoothing updating outer values only once.
831         b) local smoothing updating outer values each inner iteration
832      3) color updating out values betwen colors.
833 */
834 #undef __FUNC__
835 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ"
836 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
837 {
838   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
839   Mat        AA = mat->A,BB = mat->B;
840   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
841   Scalar     *b,*x,*xs,*ls,d,*v,sum;
842   int        ierr,*idx,*diag;
843   int        n = matin->n,m = matin->m,i,shift = A->indexshift;
844 
845   PetscFunctionBegin;
846   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
847   diag = A->diag;
848   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
849     if (flag & SOR_ZERO_INITIAL_GUESS) {
850       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
851       PetscFunctionReturn(0);
852     }
853     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
854     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
855     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856     if (xx != bb) {
857       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
858     } else {
859       b = x;
860     }
861     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
862     xs = x + shift; /* shift by one for index start of 1 */
863     ls = ls + shift;
864     while (its--) {
865       /* go down through the rows */
866       for (i=0; i<m; i++) {
867         n    = A->i[i+1] - A->i[i];
868 	PLogFlops(4*n+3);
869         idx  = A->j + A->i[i] + shift;
870         v    = A->a + A->i[i] + shift;
871         sum  = b[i];
872         SPARSEDENSEMDOT(sum,xs,v,idx,n);
873         d    = fshift + A->a[diag[i]+shift];
874         n    = B->i[i+1] - B->i[i];
875         idx  = B->j + B->i[i] + shift;
876         v    = B->a + B->i[i] + shift;
877         SPARSEDENSEMDOT(sum,ls,v,idx,n);
878         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
879       }
880       /* come up through the rows */
881       for (i=m-1; i>-1; i--) {
882         n    = A->i[i+1] - A->i[i];
883 	PLogFlops(4*n+3);
884         idx  = A->j + A->i[i] + shift;
885         v    = A->a + A->i[i] + shift;
886         sum  = b[i];
887         SPARSEDENSEMDOT(sum,xs,v,idx,n);
888         d    = fshift + A->a[diag[i]+shift];
889         n    = B->i[i+1] - B->i[i];
890         idx  = B->j + B->i[i] + shift;
891         v    = B->a + B->i[i] + shift;
892         SPARSEDENSEMDOT(sum,ls,v,idx,n);
893         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
894       }
895     }
896     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
897     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
898     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
899   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
900     if (flag & SOR_ZERO_INITIAL_GUESS) {
901       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     }
904     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
905     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
906     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
907     if (xx != bb) {
908       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
909     } else {
910       b = x;
911     }
912     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
913     xs = x + shift; /* shift by one for index start of 1 */
914     ls = ls + shift;
915     while (its--) {
916       for (i=0; i<m; i++) {
917         n    = A->i[i+1] - A->i[i];
918 	PLogFlops(4*n+3);
919         idx  = A->j + A->i[i] + shift;
920         v    = A->a + A->i[i] + shift;
921         sum  = b[i];
922         SPARSEDENSEMDOT(sum,xs,v,idx,n);
923         d    = fshift + A->a[diag[i]+shift];
924         n    = B->i[i+1] - B->i[i];
925         idx  = B->j + B->i[i] + shift;
926         v    = B->a + B->i[i] + shift;
927         SPARSEDENSEMDOT(sum,ls,v,idx,n);
928         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
929       }
930     }
931     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
932     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
933     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
934   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
935     if (flag & SOR_ZERO_INITIAL_GUESS) {
936       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
937       PetscFunctionReturn(0);
938     }
939     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
940     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
941     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
942     if (xx != bb) {
943       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
944     } else {
945       b = x;
946     }
947     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
948     xs = x + shift; /* shift by one for index start of 1 */
949     ls = ls + shift;
950     while (its--) {
951       for (i=m-1; i>-1; i--) {
952         n    = A->i[i+1] - A->i[i];
953 	PLogFlops(4*n+3);
954         idx  = A->j + A->i[i] + shift;
955         v    = A->a + A->i[i] + shift;
956         sum  = b[i];
957         SPARSEDENSEMDOT(sum,xs,v,idx,n);
958         d    = fshift + A->a[diag[i]+shift];
959         n    = B->i[i+1] - B->i[i];
960         idx  = B->j + B->i[i] + shift;
961         v    = B->a + B->i[i] + shift;
962         SPARSEDENSEMDOT(sum,ls,v,idx,n);
963         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
964       }
965     }
966     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
967     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
968     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
969   } else {
970     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNC__
976 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ"
977 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
978 {
979   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
980   Mat        A = mat->A,B = mat->B;
981   int        ierr;
982   PetscReal  isend[5],irecv[5];
983 
984   PetscFunctionBegin;
985   info->block_size     = 1.0;
986   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
987   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
988   isend[3] = info->memory;  isend[4] = info->mallocs;
989   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
990   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
991   isend[3] += info->memory;  isend[4] += info->mallocs;
992   if (flag == MAT_LOCAL) {
993     info->nz_used      = isend[0];
994     info->nz_allocated = isend[1];
995     info->nz_unneeded  = isend[2];
996     info->memory       = isend[3];
997     info->mallocs      = isend[4];
998   } else if (flag == MAT_GLOBAL_MAX) {
999     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1000     info->nz_used      = irecv[0];
1001     info->nz_allocated = irecv[1];
1002     info->nz_unneeded  = irecv[2];
1003     info->memory       = irecv[3];
1004     info->mallocs      = irecv[4];
1005   } else if (flag == MAT_GLOBAL_SUM) {
1006     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1007     info->nz_used      = irecv[0];
1008     info->nz_allocated = irecv[1];
1009     info->nz_unneeded  = irecv[2];
1010     info->memory       = irecv[3];
1011     info->mallocs      = irecv[4];
1012   }
1013   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1014   info->fill_ratio_needed = 0;
1015   info->factor_mallocs    = 0;
1016   info->rows_global       = (double)matin->M;
1017   info->columns_global    = (double)matin->N;
1018   info->rows_local        = (double)matin->m;
1019   info->columns_local     = (double)matin->N;
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNC__
1025 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ"
1026 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1027 {
1028   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1029   int        ierr;
1030 
1031   PetscFunctionBegin;
1032   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1033       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1034       op == MAT_COLUMNS_UNSORTED ||
1035       op == MAT_COLUMNS_SORTED ||
1036       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1037       op == MAT_KEEP_ZEROED_ROWS ||
1038       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1039       op == MAT_USE_INODES ||
1040       op == MAT_DO_NOT_USE_INODES ||
1041       op == MAT_IGNORE_ZERO_ENTRIES) {
1042         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1043         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1044   } else if (op == MAT_ROW_ORIENTED) {
1045     a->roworiented = PETSC_TRUE;
1046     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1047     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1048   } else if (op == MAT_ROWS_SORTED ||
1049              op == MAT_ROWS_UNSORTED ||
1050              op == MAT_SYMMETRIC ||
1051              op == MAT_STRUCTURALLY_SYMMETRIC ||
1052              op == MAT_YES_NEW_DIAGONALS) {
1053     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1054   } else if (op == MAT_COLUMN_ORIENTED) {
1055     a->roworiented = PETSC_FALSE;
1056     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1057     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1058   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1059     a->donotstash = PETSC_TRUE;
1060   } else if (op == MAT_NO_NEW_DIAGONALS){
1061     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1062   } else {
1063     SETERRQ(PETSC_ERR_SUP,"unknown option");
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ"
1070 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1071 {
1072   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1073 
1074   PetscFunctionBegin;
1075   if (m) *m = mat->rstart;
1076   if (n) *n = mat->rend;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNC__
1081 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ"
1082 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1083 {
1084   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1085   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1086   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1087   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1088   int        *cmap,*idx_p;
1089 
1090   PetscFunctionBegin;
1091   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1092   mat->getrowactive = PETSC_TRUE;
1093 
1094   if (!mat->rowvalues && (idx || v)) {
1095     /*
1096         allocate enough space to hold information from the longest row.
1097     */
1098     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1099     int     max = 1,tmp;
1100     for (i=0; i<matin->m; i++) {
1101       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1102       if (max < tmp) { max = tmp; }
1103     }
1104     mat->rowvalues  = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1105     mat->rowindices = (int*)(mat->rowvalues + max);
1106   }
1107 
1108   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1109   lrow = row - rstart;
1110 
1111   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1112   if (!v)   {pvA = 0; pvB = 0;}
1113   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1114   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1115   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1116   nztot = nzA + nzB;
1117 
1118   cmap  = mat->garray;
1119   if (v  || idx) {
1120     if (nztot) {
1121       /* Sort by increasing column numbers, assuming A and B already sorted */
1122       int imark = -1;
1123       if (v) {
1124         *v = v_p = mat->rowvalues;
1125         for (i=0; i<nzB; i++) {
1126           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1127           else break;
1128         }
1129         imark = i;
1130         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1131         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1132       }
1133       if (idx) {
1134         *idx = idx_p = mat->rowindices;
1135         if (imark > -1) {
1136           for (i=0; i<imark; i++) {
1137             idx_p[i] = cmap[cworkB[i]];
1138           }
1139         } else {
1140           for (i=0; i<nzB; i++) {
1141             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1142             else break;
1143           }
1144           imark = i;
1145         }
1146         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1147         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1148       }
1149     } else {
1150       if (idx) *idx = 0;
1151       if (v)   *v   = 0;
1152     }
1153   }
1154   *nz = nztot;
1155   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1156   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNC__
1161 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ"
1162 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1163 {
1164   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1165 
1166   PetscFunctionBegin;
1167   if (aij->getrowactive == PETSC_FALSE) {
1168     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1169   }
1170   aij->getrowactive = PETSC_FALSE;
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNC__
1175 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ"
1176 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1177 {
1178   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1179   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1180   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1181   PetscReal  sum = 0.0;
1182   Scalar     *v;
1183 
1184   PetscFunctionBegin;
1185   if (aij->size == 1) {
1186     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1187   } else {
1188     if (type == NORM_FROBENIUS) {
1189       v = amat->a;
1190       for (i=0; i<amat->nz; i++) {
1191 #if defined(PETSC_USE_COMPLEX)
1192         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1193 #else
1194         sum += (*v)*(*v); v++;
1195 #endif
1196       }
1197       v = bmat->a;
1198       for (i=0; i<bmat->nz; i++) {
1199 #if defined(PETSC_USE_COMPLEX)
1200         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1201 #else
1202         sum += (*v)*(*v); v++;
1203 #endif
1204       }
1205       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1206       *norm = sqrt(*norm);
1207     } else if (type == NORM_1) { /* max column norm */
1208       PetscReal *tmp,*tmp2;
1209       int    *jj,*garray = aij->garray;
1210       tmp  = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp);
1211       tmp2 = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2);
1212       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1213       *norm = 0.0;
1214       v = amat->a; jj = amat->j;
1215       for (j=0; j<amat->nz; j++) {
1216         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1217       }
1218       v = bmat->a; jj = bmat->j;
1219       for (j=0; j<bmat->nz; j++) {
1220         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1221       }
1222       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1223       for (j=0; j<mat->N; j++) {
1224         if (tmp2[j] > *norm) *norm = tmp2[j];
1225       }
1226       ierr = PetscFree(tmp);CHKERRQ(ierr);
1227       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1228     } else if (type == NORM_INFINITY) { /* max row norm */
1229       PetscReal ntemp = 0.0;
1230       for (j=0; j<aij->A->m; j++) {
1231         v = amat->a + amat->i[j] + shift;
1232         sum = 0.0;
1233         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1234           sum += PetscAbsScalar(*v); v++;
1235         }
1236         v = bmat->a + bmat->i[j] + shift;
1237         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1238           sum += PetscAbsScalar(*v); v++;
1239         }
1240         if (sum > ntemp) ntemp = sum;
1241       }
1242       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1243     } else {
1244       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1245     }
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNC__
1251 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ"
1252 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1253 {
1254   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1255   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1256   int        ierr,shift = Aloc->indexshift;
1257   int        M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1258   Mat        B;
1259   Scalar     *array;
1260 
1261   PetscFunctionBegin;
1262   if (!matout && M != N) {
1263     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1264   }
1265 
1266   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1267 
1268   /* copy over the A part */
1269   Aloc = (Mat_SeqAIJ*)a->A->data;
1270   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1271   row = a->rstart;
1272   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1273   for (i=0; i<m; i++) {
1274     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1275     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1276   }
1277   aj = Aloc->j;
1278   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1279 
1280   /* copy over the B part */
1281   Aloc = (Mat_SeqAIJ*)a->B->data;
1282   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1283   row = a->rstart;
1284   ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols);
1285   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1286   for (i=0; i<m; i++) {
1287     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1288     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1289   }
1290   ierr = PetscFree(ct);CHKERRQ(ierr);
1291   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1293   if (matout) {
1294     *matout = B;
1295   } else {
1296     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNC__
1302 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ"
1303 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1304 {
1305   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1306   Mat        a = aij->A,b = aij->B;
1307   int        ierr,s1,s2,s3;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1311   if (rr) {
1312     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1313     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1314     /* Overlap communication with computation. */
1315     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1316   }
1317   if (ll) {
1318     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1319     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1320     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1321   }
1322   /* scale  the diagonal block */
1323   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1324 
1325   if (rr) {
1326     /* Do a scatter end and then right scale the off-diagonal block */
1327     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1328     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1329   }
1330 
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 
1335 #undef __FUNC__
1336 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ"
1337 int MatPrintHelp_MPIAIJ(Mat A)
1338 {
1339   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1340   int        ierr;
1341 
1342   PetscFunctionBegin;
1343   if (!a->rank) {
1344     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNC__
1350 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ"
1351 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1352 {
1353   PetscFunctionBegin;
1354   *bs = 1;
1355   PetscFunctionReturn(0);
1356 }
1357 #undef __FUNC__
1358 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ"
1359 int MatSetUnfactored_MPIAIJ(Mat A)
1360 {
1361   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1362   int        ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNC__
1370 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ"
1371 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1372 {
1373   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1374   Mat        a,b,c,d;
1375   PetscTruth flg;
1376   int        ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1380   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1381   a = matA->A; b = matA->B;
1382   c = matB->A; d = matB->B;
1383 
1384   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1385   if (flg == PETSC_TRUE) {
1386     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1387   }
1388   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNC__
1393 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ"
1394 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1395 {
1396   int        ierr;
1397   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1398   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1399   PetscTruth flg;
1400 
1401   PetscFunctionBegin;
1402   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1403   if (str != SAME_NONZERO_PATTERN || !flg) {
1404     /* because of the column compression in the off-processor part of the matrix a->B,
1405        the number of columns in a->B and b->B may be different, hence we cannot call
1406        the MatCopy() directly on the two parts. If need be, we can provide a more
1407        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1408        then copying the submatrices */
1409     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1410   } else {
1411     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1412     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNC__
1418 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIAIJ"></a>*/"MatSetUpPreallocation_MPIAIJ"
1419 int MatSetUpPreallocation_MPIAIJ(Mat A)
1420 {
1421   int        ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1429 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1430 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1431 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1432 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1433 
1434 /* -------------------------------------------------------------------*/
1435 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1436        MatGetRow_MPIAIJ,
1437        MatRestoreRow_MPIAIJ,
1438        MatMult_MPIAIJ,
1439        MatMultAdd_MPIAIJ,
1440        MatMultTranspose_MPIAIJ,
1441        MatMultTransposeAdd_MPIAIJ,
1442        0,
1443        0,
1444        0,
1445        0,
1446        0,
1447        0,
1448        MatRelax_MPIAIJ,
1449        MatTranspose_MPIAIJ,
1450        MatGetInfo_MPIAIJ,
1451        MatEqual_MPIAIJ,
1452        MatGetDiagonal_MPIAIJ,
1453        MatDiagonalScale_MPIAIJ,
1454        MatNorm_MPIAIJ,
1455        MatAssemblyBegin_MPIAIJ,
1456        MatAssemblyEnd_MPIAIJ,
1457        0,
1458        MatSetOption_MPIAIJ,
1459        MatZeroEntries_MPIAIJ,
1460        MatZeroRows_MPIAIJ,
1461        0,
1462        0,
1463        0,
1464        0,
1465        MatSetUpPreallocation_MPIAIJ,
1466        0,
1467        MatGetOwnershipRange_MPIAIJ,
1468        0,
1469        0,
1470        0,
1471        0,
1472        MatDuplicate_MPIAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477        0,
1478        MatGetSubMatrices_MPIAIJ,
1479        MatIncreaseOverlap_MPIAIJ,
1480        MatGetValues_MPIAIJ,
1481        MatCopy_MPIAIJ,
1482        MatPrintHelp_MPIAIJ,
1483        MatScale_MPIAIJ,
1484        0,
1485        0,
1486        0,
1487        MatGetBlockSize_MPIAIJ,
1488        0,
1489        0,
1490        0,
1491        0,
1492        MatFDColoringCreate_MPIAIJ,
1493        0,
1494        MatSetUnfactored_MPIAIJ,
1495        0,
1496        0,
1497        MatGetSubMatrix_MPIAIJ,
1498        MatDestroy_MPIAIJ,
1499        MatView_MPIAIJ,
1500        MatGetMaps_Petsc};
1501 
1502 /* ----------------------------------------------------------------------------------------*/
1503 
1504 EXTERN_C_BEGIN
1505 #undef __FUNC__
1506 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ"
1507 int MatStoreValues_MPIAIJ(Mat mat)
1508 {
1509   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1510   int        ierr;
1511 
1512   PetscFunctionBegin;
1513   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1514   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 EXTERN_C_END
1518 
1519 EXTERN_C_BEGIN
1520 #undef __FUNC__
1521 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ"
1522 int MatRetrieveValues_MPIAIJ(Mat mat)
1523 {
1524   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1525   int        ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1529   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 EXTERN_C_END
1533 
1534 #include "petscpc.h"
1535 EXTERN_C_BEGIN
1536 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1537 EXTERN_C_END
1538 
1539 EXTERN_C_BEGIN
1540 #undef __FUNC__
1541 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIAIJ"
1542 int MatCreate_MPIAIJ(Mat B)
1543 {
1544   Mat_MPIAIJ   *b;
1545   int          ierr,i,size;
1546 
1547   PetscFunctionBegin;
1548   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1549 
1550   B->data         = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1551   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1552   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1553   B->factor       = 0;
1554   B->assembled    = PETSC_FALSE;
1555   B->mapping      = 0;
1556 
1557   B->insertmode      = NOT_SET_VALUES;
1558   b->size            = size;
1559   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1560 
1561   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1562   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1563 
1564   /* the information in the maps duplicates the information computed below, eventually
1565      we should remove the duplicate information that is not contained in the maps */
1566   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1567   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1568 
1569   /* build local table of row and column ownerships */
1570   b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1571   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1572   b->cowners = b->rowners + b->size + 2;
1573   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1574   b->rowners[0] = 0;
1575   for (i=2; i<=b->size; i++) {
1576     b->rowners[i] += b->rowners[i-1];
1577   }
1578   b->rstart = b->rowners[b->rank];
1579   b->rend   = b->rowners[b->rank+1];
1580   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1581   b->cowners[0] = 0;
1582   for (i=2; i<=b->size; i++) {
1583     b->cowners[i] += b->cowners[i-1];
1584   }
1585   b->cstart = b->cowners[b->rank];
1586   b->cend   = b->cowners[b->rank+1];
1587 
1588   /* build cache for off array entries formed */
1589   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1590   b->donotstash  = PETSC_FALSE;
1591   b->colmap      = 0;
1592   b->garray      = 0;
1593   b->roworiented = PETSC_TRUE;
1594 
1595   /* stuff used for matrix vector multiply */
1596   b->lvec      = PETSC_NULL;
1597   b->Mvctx     = PETSC_NULL;
1598 
1599   /* stuff for MatGetRow() */
1600   b->rowindices   = 0;
1601   b->rowvalues    = 0;
1602   b->getrowactive = PETSC_FALSE;
1603 
1604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1605                                      "MatStoreValues_MPIAIJ",
1606                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1607   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1608                                      "MatRetrieveValues_MPIAIJ",
1609                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1611                                      "MatGetDiagonalBlock_MPIAIJ",
1612                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1613   PetscFunctionReturn(0);
1614 }
1615 EXTERN_C_END
1616 
1617 #undef __FUNC__
1618 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ"
1619 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1620 {
1621   Mat        mat;
1622   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1623   int        ierr,len = 0;
1624 
1625   PetscFunctionBegin;
1626   *newmat       = 0;
1627   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1628   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1629   a    = (Mat_MPIAIJ*)mat->data;
1630   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1631   mat->factor       = matin->factor;
1632   mat->assembled    = PETSC_TRUE;
1633   mat->insertmode   = NOT_SET_VALUES;
1634   mat->preallocated = PETSC_TRUE;
1635 
1636   a->rstart       = oldmat->rstart;
1637   a->rend         = oldmat->rend;
1638   a->cstart       = oldmat->cstart;
1639   a->cend         = oldmat->cend;
1640   a->size         = oldmat->size;
1641   a->rank         = oldmat->rank;
1642   a->donotstash   = oldmat->donotstash;
1643   a->roworiented  = oldmat->roworiented;
1644   a->rowindices   = 0;
1645   a->rowvalues    = 0;
1646   a->getrowactive = PETSC_FALSE;
1647 
1648   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1649   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1650   if (oldmat->colmap) {
1651 #if defined (PETSC_USE_CTABLE)
1652     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1653 #else
1654     a->colmap = (int*)PetscMalloc((mat->N)*sizeof(int));CHKPTRQ(a->colmap);
1655     PLogObjectMemory(mat,(mat->N)*sizeof(int));
1656     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1657 #endif
1658   } else a->colmap = 0;
1659   if (oldmat->garray) {
1660     len = oldmat->B->n;
1661     a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1662     PLogObjectMemory(mat,len*sizeof(int));
1663     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1664   } else a->garray = 0;
1665 
1666   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1667   PLogObjectParent(mat,a->lvec);
1668   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1669   PLogObjectParent(mat,a->Mvctx);
1670   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1671   PLogObjectParent(mat,a->A);
1672   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1673   PLogObjectParent(mat,a->B);
1674   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1675   *newmat = mat;
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #include "petscsys.h"
1680 
1681 EXTERN_C_BEGIN
1682 #undef __FUNC__
1683 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ"
1684 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1685 {
1686   Mat          A;
1687   Scalar       *vals,*svals;
1688   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1689   MPI_Status   status;
1690   int          i,nz,ierr,j,rstart,rend,fd;
1691   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1692   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1693   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1694 
1695   PetscFunctionBegin;
1696   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1698   if (!rank) {
1699     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1700     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1701     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1702     if (header[3] < 0) {
1703       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1704     }
1705   }
1706 
1707   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1708   M = header[1]; N = header[2];
1709   /* determine ownership of all rows */
1710   m = M/size + ((M % size) > rank);
1711   rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1712   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1713   rowners[0] = 0;
1714   for (i=2; i<=size; i++) {
1715     rowners[i] += rowners[i-1];
1716   }
1717   rstart = rowners[rank];
1718   rend   = rowners[rank+1];
1719 
1720   /* distribute row lengths to all processors */
1721   ourlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(ourlens);
1722   offlens = ourlens + (rend-rstart);
1723   if (!rank) {
1724     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1725     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1726     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1727     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1728     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1729     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1730   } else {
1731     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1732   }
1733 
1734   if (!rank) {
1735     /* calculate the number of nonzeros on each processor */
1736     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1737     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1738     for (i=0; i<size; i++) {
1739       for (j=rowners[i]; j< rowners[i+1]; j++) {
1740         procsnz[i] += rowlengths[j];
1741       }
1742     }
1743     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1744 
1745     /* determine max buffer needed and allocate it */
1746     maxnz = 0;
1747     for (i=0; i<size; i++) {
1748       maxnz = PetscMax(maxnz,procsnz[i]);
1749     }
1750     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1751 
1752     /* read in my part of the matrix column indices  */
1753     nz     = procsnz[0];
1754     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1755     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1756 
1757     /* read in every one elses and ship off */
1758     for (i=1; i<size; i++) {
1759       nz   = procsnz[i];
1760       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1761       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1762     }
1763     ierr = PetscFree(cols);CHKERRQ(ierr);
1764   } else {
1765     /* determine buffer space needed for message */
1766     nz = 0;
1767     for (i=0; i<m; i++) {
1768       nz += ourlens[i];
1769     }
1770     mycols = (int*)PetscMalloc((nz+1)*sizeof(int));CHKPTRQ(mycols);
1771 
1772     /* receive message of column indices*/
1773     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1774     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1775     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1776   }
1777 
1778   /* determine column ownership if matrix is not square */
1779   if (N != M) {
1780     n      = N/size + ((N % size) > rank);
1781     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1782     cstart = cend - n;
1783   } else {
1784     cstart = rstart;
1785     cend   = rend;
1786     n      = cend - cstart;
1787   }
1788 
1789   /* loop over local rows, determining number of off diagonal entries */
1790   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1791   jj = 0;
1792   for (i=0; i<m; i++) {
1793     for (j=0; j<ourlens[i]; j++) {
1794       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1795       jj++;
1796     }
1797   }
1798 
1799   /* create our matrix */
1800   for (i=0; i<m; i++) {
1801     ourlens[i] -= offlens[i];
1802   }
1803   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1804   A = *newmat;
1805   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1806   for (i=0; i<m; i++) {
1807     ourlens[i] += offlens[i];
1808   }
1809 
1810   if (!rank) {
1811     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1812 
1813     /* read in my part of the matrix numerical values  */
1814     nz   = procsnz[0];
1815     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1816 
1817     /* insert into matrix */
1818     jj      = rstart;
1819     smycols = mycols;
1820     svals   = vals;
1821     for (i=0; i<m; i++) {
1822       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1823       smycols += ourlens[i];
1824       svals   += ourlens[i];
1825       jj++;
1826     }
1827 
1828     /* read in other processors and ship out */
1829     for (i=1; i<size; i++) {
1830       nz   = procsnz[i];
1831       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1832       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1833     }
1834     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1835   } else {
1836     /* receive numeric values */
1837     vals = (Scalar*)PetscMalloc((nz+1)*sizeof(Scalar));CHKPTRQ(vals);
1838 
1839     /* receive message of values*/
1840     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1841     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1842     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1843 
1844     /* insert into matrix */
1845     jj      = rstart;
1846     smycols = mycols;
1847     svals   = vals;
1848     for (i=0; i<m; i++) {
1849       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1850       smycols += ourlens[i];
1851       svals   += ourlens[i];
1852       jj++;
1853     }
1854   }
1855   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1856   ierr = PetscFree(vals);CHKERRQ(ierr);
1857   ierr = PetscFree(mycols);CHKERRQ(ierr);
1858   ierr = PetscFree(rowners);CHKERRQ(ierr);
1859 
1860   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1861   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 EXTERN_C_END
1865 
1866 #undef __FUNC__
1867 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ"
1868 /*
1869     Not great since it makes two copies of the submatrix, first an SeqAIJ
1870   in local and then by concatenating the local matrices the end result.
1871   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1872 */
1873 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1874 {
1875   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1876   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1877   Mat        *local,M,Mreuse;
1878   Scalar     *vwork,*aa;
1879   MPI_Comm   comm = mat->comm;
1880   Mat_SeqAIJ *aij;
1881 
1882 
1883   PetscFunctionBegin;
1884   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1886 
1887   if (call ==  MAT_REUSE_MATRIX) {
1888     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1889     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1890     local = &Mreuse;
1891     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1892   } else {
1893     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1894     Mreuse = *local;
1895     ierr = PetscFree(local);CHKERRQ(ierr);
1896   }
1897 
1898   /*
1899       m - number of local rows
1900       n - number of columns (same on all processors)
1901       rstart - first row in new global matrix generated
1902   */
1903   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1904   if (call == MAT_INITIAL_MATRIX) {
1905     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1906     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1907     ii  = aij->i;
1908     jj  = aij->j;
1909 
1910     /*
1911         Determine the number of non-zeros in the diagonal and off-diagonal
1912         portions of the matrix in order to do correct preallocation
1913     */
1914 
1915     /* first get start and end of "diagonal" columns */
1916     if (csize == PETSC_DECIDE) {
1917       nlocal = n/size + ((n % size) > rank);
1918     } else {
1919       nlocal = csize;
1920     }
1921     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1922     rstart = rend - nlocal;
1923     if (rank == size - 1 && rend != n) {
1924       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1925     }
1926 
1927     /* next, compute all the lengths */
1928     dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens);
1929     olens = dlens + m;
1930     for (i=0; i<m; i++) {
1931       jend = ii[i+1] - ii[i];
1932       olen = 0;
1933       dlen = 0;
1934       for (j=0; j<jend; j++) {
1935         if (*jj < rstart || *jj >= rend) olen++;
1936         else dlen++;
1937         jj++;
1938       }
1939       olens[i] = olen;
1940       dlens[i] = dlen;
1941     }
1942     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1943     ierr = PetscFree(dlens);CHKERRQ(ierr);
1944   } else {
1945     int ml,nl;
1946 
1947     M = *newmat;
1948     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1949     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1950     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1951     /*
1952          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1953        rather than the slower MatSetValues().
1954     */
1955     M->was_assembled = PETSC_TRUE;
1956     M->assembled     = PETSC_FALSE;
1957   }
1958   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1959   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1960   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1961   ii  = aij->i;
1962   jj  = aij->j;
1963   aa  = aij->a;
1964   for (i=0; i<m; i++) {
1965     row   = rstart + i;
1966     nz    = ii[i+1] - ii[i];
1967     cwork = jj;     jj += nz;
1968     vwork = aa;     aa += nz;
1969     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1970   }
1971 
1972   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1974   *newmat = M;
1975 
1976   /* save submatrix used in processor for next request */
1977   if (call ==  MAT_INITIAL_MATRIX) {
1978     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1979     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1980   }
1981 
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNC__
1986 #define __FUNC__ /*<a name=""></a>*/"MatMPIAIJSetPreallocation"
1987 /*@C
1988    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1989    (the default parallel PETSc format).  For good matrix assembly performance
1990    the user should preallocate the matrix storage by setting the parameters
1991    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1992    performance can be increased by more than a factor of 50.
1993 
1994    Collective on MPI_Comm
1995 
1996    Input Parameters:
1997 +  A - the matrix
1998 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1999            (same value is used for all local rows)
2000 .  d_nnz - array containing the number of nonzeros in the various rows of the
2001            DIAGONAL portion of the local submatrix (possibly different for each row)
2002            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2003            The size of this array is equal to the number of local rows, i.e 'm'.
2004            You must leave room for the diagonal entry even if it is zero.
2005 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2006            submatrix (same value is used for all local rows).
2007 -  o_nnz - array containing the number of nonzeros in the various rows of the
2008            OFF-DIAGONAL portion of the local submatrix (possibly different for
2009            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2010            structure. The size of this array is equal to the number
2011            of local rows, i.e 'm'.
2012 
2013    The AIJ format (also called the Yale sparse matrix format or
2014    compressed row storage), is fully compatible with standard Fortran 77
2015    storage.  That is, the stored row and column indices can begin at
2016    either one (as in Fortran) or zero.  See the users manual for details.
2017 
2018    The user MUST specify either the local or global matrix dimensions
2019    (possibly both).
2020 
2021    The parallel matrix is partitioned such that the first m0 rows belong to
2022    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2023    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2024 
2025    The DIAGONAL portion of the local submatrix of a processor can be defined
2026    as the submatrix which is obtained by extraction the part corresponding
2027    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2028    first row that belongs to the processor, and r2 is the last row belonging
2029    to the this processor. This is a square mxm matrix. The remaining portion
2030    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2031 
2032    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2033 
2034    By default, this format uses inodes (identical nodes) when possible.
2035    We search for consecutive rows with the same nonzero structure, thereby
2036    reusing matrix information to achieve increased efficiency.
2037 
2038    Options Database Keys:
2039 +  -mat_aij_no_inode  - Do not use inodes
2040 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2041 -  -mat_aij_oneindex - Internally use indexing starting at 1
2042         rather than 0.  Note that when calling MatSetValues(),
2043         the user still MUST index entries starting at 0!
2044 
2045    Example usage:
2046 
2047    Consider the following 8x8 matrix with 34 non-zero values, that is
2048    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2049    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2050    as follows:
2051 
2052 .vb
2053             1  2  0  |  0  3  0  |  0  4
2054     Proc0   0  5  6  |  7  0  0  |  8  0
2055             9  0 10  | 11  0  0  | 12  0
2056     -------------------------------------
2057            13  0 14  | 15 16 17  |  0  0
2058     Proc1   0 18  0  | 19 20 21  |  0  0
2059             0  0  0  | 22 23  0  | 24  0
2060     -------------------------------------
2061     Proc2  25 26 27  |  0  0 28  | 29  0
2062            30  0  0  | 31 32 33  |  0 34
2063 .ve
2064 
2065    This can be represented as a collection of submatrices as:
2066 
2067 .vb
2068       A B C
2069       D E F
2070       G H I
2071 .ve
2072 
2073    Where the submatrices A,B,C are owned by proc0, D,E,F are
2074    owned by proc1, G,H,I are owned by proc2.
2075 
2076    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2077    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2078    The 'M','N' parameters are 8,8, and have the same values on all procs.
2079 
2080    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2081    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2082    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2083    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2084    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2085    matrix, ans [DF] as another SeqAIJ matrix.
2086 
2087    When d_nz, o_nz parameters are specified, d_nz storage elements are
2088    allocated for every row of the local diagonal submatrix, and o_nz
2089    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2090    One way to choose d_nz and o_nz is to use the max nonzerors per local
2091    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2092    In this case, the values of d_nz,o_nz are:
2093 .vb
2094      proc0 : dnz = 2, o_nz = 2
2095      proc1 : dnz = 3, o_nz = 2
2096      proc2 : dnz = 1, o_nz = 4
2097 .ve
2098    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2099    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2100    for proc3. i.e we are using 12+15+10=37 storage locations to store
2101    34 values.
2102 
2103    When d_nnz, o_nnz parameters are specified, the storage is specified
2104    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2105    In the above case the values for d_nnz,o_nnz are:
2106 .vb
2107      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2108      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2109      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2110 .ve
2111    Here the space allocated is sum of all the above values i.e 34, and
2112    hence pre-allocation is perfect.
2113 
2114    Level: intermediate
2115 
2116 .keywords: matrix, aij, compressed row, sparse, parallel
2117 
2118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2119 @*/
2120 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2121 {
2122   Mat_MPIAIJ   *b;
2123   int          ierr,i;
2124   PetscTruth   flg2;
2125 
2126   PetscFunctionBegin;
2127   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2128   if (!flg2) PetscFunctionReturn(0);
2129   B->preallocated = PETSC_TRUE;
2130   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz);
2131   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz);
2132   if (d_nnz) {
2133     for (i=0; i<B->m; i++) {
2134       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2135     }
2136   }
2137   if (o_nnz) {
2138     for (i=0; i<B->m; i++) {
2139       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2140     }
2141   }
2142   b = (Mat_MPIAIJ*)B->data;
2143 
2144   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2145   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2146   PLogObjectParent(B,b->A);
2147   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2148   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2149   PLogObjectParent(B,b->B);
2150 
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNC__
2155 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ"
2156 /*@C
2157    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2158    (the default parallel PETSc format).  For good matrix assembly performance
2159    the user should preallocate the matrix storage by setting the parameters
2160    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2161    performance can be increased by more than a factor of 50.
2162 
2163    Collective on MPI_Comm
2164 
2165    Input Parameters:
2166 +  comm - MPI communicator
2167 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2168            This value should be the same as the local size used in creating the
2169            y vector for the matrix-vector product y = Ax.
2170 .  n - This value should be the same as the local size used in creating the
2171        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2172        calculated if N is given) For square matrices n is almost always m.
2173 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2174 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2175 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2176            (same value is used for all local rows)
2177 .  d_nnz - array containing the number of nonzeros in the various rows of the
2178            DIAGONAL portion of the local submatrix (possibly different for each row)
2179            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2180            The size of this array is equal to the number of local rows, i.e 'm'.
2181            You must leave room for the diagonal entry even if it is zero.
2182 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2183            submatrix (same value is used for all local rows).
2184 -  o_nnz - array containing the number of nonzeros in the various rows of the
2185            OFF-DIAGONAL portion of the local submatrix (possibly different for
2186            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2187            structure. The size of this array is equal to the number
2188            of local rows, i.e 'm'.
2189 
2190    Output Parameter:
2191 .  A - the matrix
2192 
2193    Notes:
2194    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2195    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2196    storage requirements for this matrix.
2197 
2198    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2199    processor than it must be used on all processors that share the object for
2200    that argument.
2201 
2202    The AIJ format (also called the Yale sparse matrix format or
2203    compressed row storage), is fully compatible with standard Fortran 77
2204    storage.  That is, the stored row and column indices can begin at
2205    either one (as in Fortran) or zero.  See the users manual for details.
2206 
2207    The user MUST specify either the local or global matrix dimensions
2208    (possibly both).
2209 
2210    The parallel matrix is partitioned such that the first m0 rows belong to
2211    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2212    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2213 
2214    The DIAGONAL portion of the local submatrix of a processor can be defined
2215    as the submatrix which is obtained by extraction the part corresponding
2216    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2217    first row that belongs to the processor, and r2 is the last row belonging
2218    to the this processor. This is a square mxm matrix. The remaining portion
2219    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2220 
2221    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2222 
2223    By default, this format uses inodes (identical nodes) when possible.
2224    We search for consecutive rows with the same nonzero structure, thereby
2225    reusing matrix information to achieve increased efficiency.
2226 
2227    Options Database Keys:
2228 +  -mat_aij_no_inode  - Do not use inodes
2229 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2230 -  -mat_aij_oneindex - Internally use indexing starting at 1
2231         rather than 0.  Note that when calling MatSetValues(),
2232         the user still MUST index entries starting at 0!
2233 
2234 
2235    Example usage:
2236 
2237    Consider the following 8x8 matrix with 34 non-zero values, that is
2238    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2239    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2240    as follows:
2241 
2242 .vb
2243             1  2  0  |  0  3  0  |  0  4
2244     Proc0   0  5  6  |  7  0  0  |  8  0
2245             9  0 10  | 11  0  0  | 12  0
2246     -------------------------------------
2247            13  0 14  | 15 16 17  |  0  0
2248     Proc1   0 18  0  | 19 20 21  |  0  0
2249             0  0  0  | 22 23  0  | 24  0
2250     -------------------------------------
2251     Proc2  25 26 27  |  0  0 28  | 29  0
2252            30  0  0  | 31 32 33  |  0 34
2253 .ve
2254 
2255    This can be represented as a collection of submatrices as:
2256 
2257 .vb
2258       A B C
2259       D E F
2260       G H I
2261 .ve
2262 
2263    Where the submatrices A,B,C are owned by proc0, D,E,F are
2264    owned by proc1, G,H,I are owned by proc2.
2265 
2266    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2267    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2268    The 'M','N' parameters are 8,8, and have the same values on all procs.
2269 
2270    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2271    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2272    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2273    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2274    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2275    matrix, ans [DF] as another SeqAIJ matrix.
2276 
2277    When d_nz, o_nz parameters are specified, d_nz storage elements are
2278    allocated for every row of the local diagonal submatrix, and o_nz
2279    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2280    One way to choose d_nz and o_nz is to use the max nonzerors per local
2281    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2282    In this case, the values of d_nz,o_nz are:
2283 .vb
2284      proc0 : dnz = 2, o_nz = 2
2285      proc1 : dnz = 3, o_nz = 2
2286      proc2 : dnz = 1, o_nz = 4
2287 .ve
2288    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2289    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2290    for proc3. i.e we are using 12+15+10=37 storage locations to store
2291    34 values.
2292 
2293    When d_nnz, o_nnz parameters are specified, the storage is specified
2294    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2295    In the above case the values for d_nnz,o_nnz are:
2296 .vb
2297      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2298      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2299      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2300 .ve
2301    Here the space allocated is sum of all the above values i.e 34, and
2302    hence pre-allocation is perfect.
2303 
2304    Level: intermediate
2305 
2306 .keywords: matrix, aij, compressed row, sparse, parallel
2307 
2308 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2309 @*/
2310 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2311 {
2312   int ierr,size;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2317   if (size > 1) {
2318     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2319     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2320   } else {
2321     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2322     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2323   }
2324   PetscFunctionReturn(0);
2325 }
2326