xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4d9d31ab498596cb7ef6b2cbe5750038ce732ce3)
1 /*$Id: mpiaij.c,v 1.337 2001/06/03 02:51:04 bsmith Exp buschelm $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         Scalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
197   Scalar     value;
198   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int        cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat        A = aij->A;
204   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
205   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   Scalar     *aa = a->a;
207   PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat        B = aij->B;
209   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
210   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   Scalar     *ba = b->a;
212 
213   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int        nonew = a->nonew,shift = a->indexshift;
215   Scalar     *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(Scalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     Scalar      *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832     This has to provide several versions.
833 
834      2) a) use only local smoothing updating outer values only once.
835         b) local smoothing updating outer values each inner iteration
836      3) color updating out values betwen colors.
837 */
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRelax_MPIAIJ"
840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
841 {
842   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
843   Mat        AA = mat->A,BB = mat->B;
844   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
845   Scalar     *b,*x,*xs,*ls,d,*v,sum;
846   int        ierr,*idx,*diag;
847   int        n = matin->n,m = matin->m,i,shift = A->indexshift;
848 
849   PetscFunctionBegin;
850   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
851   diag = A->diag;
852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
855       PetscFunctionReturn(0);
856     }
857     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
858     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
859     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860     if (xx != bb) {
861       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862     } else {
863       b = x;
864     }
865     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
866     xs = x + shift; /* shift by one for index start of 1 */
867     ls = ls + shift;
868     while (its--) {
869       /* go down through the rows */
870       for (i=0; i<m; i++) {
871         n    = A->i[i+1] - A->i[i];
872 	PetscLogFlops(4*n+3);
873         idx  = A->j + A->i[i] + shift;
874         v    = A->a + A->i[i] + shift;
875         sum  = b[i];
876         SPARSEDENSEMDOT(sum,xs,v,idx,n);
877         d    = fshift + A->a[diag[i]+shift];
878         n    = B->i[i+1] - B->i[i];
879         idx  = B->j + B->i[i] + shift;
880         v    = B->a + B->i[i] + shift;
881         SPARSEDENSEMDOT(sum,ls,v,idx,n);
882         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
883       }
884       /* come up through the rows */
885       for (i=m-1; i>-1; i--) {
886         n    = A->i[i+1] - A->i[i];
887 	PetscLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899     }
900     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
902     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
903   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
904     if (flag & SOR_ZERO_INITIAL_GUESS) {
905       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
906       PetscFunctionReturn(0);
907     }
908     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
909     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
910     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
911     if (xx != bb) {
912       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
913     } else {
914       b = x;
915     }
916     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
917     xs = x + shift; /* shift by one for index start of 1 */
918     ls = ls + shift;
919     while (its--) {
920       for (i=0; i<m; i++) {
921         n    = A->i[i+1] - A->i[i];
922 	PetscLogFlops(4*n+3);
923         idx  = A->j + A->i[i] + shift;
924         v    = A->a + A->i[i] + shift;
925         sum  = b[i];
926         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927         d    = fshift + A->a[diag[i]+shift];
928         n    = B->i[i+1] - B->i[i];
929         idx  = B->j + B->i[i] + shift;
930         v    = B->a + B->i[i] + shift;
931         SPARSEDENSEMDOT(sum,ls,v,idx,n);
932         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
933       }
934     }
935     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
936     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
937     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
938   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
941       PetscFunctionReturn(0);
942     }
943     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
944     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
945     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
946     if (xx != bb) {
947       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
948     } else {
949       b = x;
950     }
951     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
952     xs = x + shift; /* shift by one for index start of 1 */
953     ls = ls + shift;
954     while (its--) {
955       for (i=m-1; i>-1; i--) {
956         n    = A->i[i+1] - A->i[i];
957 	PetscLogFlops(4*n+3);
958         idx  = A->j + A->i[i] + shift;
959         v    = A->a + A->i[i] + shift;
960         sum  = b[i];
961         SPARSEDENSEMDOT(sum,xs,v,idx,n);
962         d    = fshift + A->a[diag[i]+shift];
963         n    = B->i[i+1] - B->i[i];
964         idx  = B->j + B->i[i] + shift;
965         v    = B->a + B->i[i] + shift;
966         SPARSEDENSEMDOT(sum,ls,v,idx,n);
967         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
968       }
969     }
970     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
972     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   } else {
974     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatGetInfo_MPIAIJ"
981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
982 {
983   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
984   Mat        A = mat->A,B = mat->B;
985   int        ierr;
986   PetscReal  isend[5],irecv[5];
987 
988   PetscFunctionBegin;
989   info->block_size     = 1.0;
990   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
991   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
992   isend[3] = info->memory;  isend[4] = info->mallocs;
993   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
994   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
995   isend[3] += info->memory;  isend[4] += info->mallocs;
996   if (flag == MAT_LOCAL) {
997     info->nz_used      = isend[0];
998     info->nz_allocated = isend[1];
999     info->nz_unneeded  = isend[2];
1000     info->memory       = isend[3];
1001     info->mallocs      = isend[4];
1002   } else if (flag == MAT_GLOBAL_MAX) {
1003     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1004     info->nz_used      = irecv[0];
1005     info->nz_allocated = irecv[1];
1006     info->nz_unneeded  = irecv[2];
1007     info->memory       = irecv[3];
1008     info->mallocs      = irecv[4];
1009   } else if (flag == MAT_GLOBAL_SUM) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   }
1017   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1018   info->fill_ratio_needed = 0;
1019   info->factor_mallocs    = 0;
1020   info->rows_global       = (double)matin->M;
1021   info->columns_global    = (double)matin->N;
1022   info->rows_local        = (double)matin->m;
1023   info->columns_local     = (double)matin->N;
1024 
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatSetOption_MPIAIJ"
1030 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1033   int        ierr;
1034 
1035   PetscFunctionBegin;
1036   switch (op) {
1037   case MAT_NO_NEW_NONZERO_LOCATIONS:
1038   case MAT_YES_NEW_NONZERO_LOCATIONS:
1039   case MAT_COLUMNS_UNSORTED:
1040   case MAT_COLUMNS_SORTED:
1041   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1042   case MAT_KEEP_ZEROED_ROWS:
1043   case MAT_NEW_NONZERO_LOCATION_ERR:
1044   case MAT_USE_INODES:
1045   case MAT_DO_NOT_USE_INODES:
1046   case MAT_IGNORE_ZERO_ENTRIES:
1047     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1048     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1049     break;
1050   case MAT_ROW_ORIENTED:
1051     a->roworiented = PETSC_TRUE;
1052     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1053     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1054     break;
1055   case MAT_ROWS_SORTED:
1056   case MAT_ROWS_UNSORTED:
1057   case MAT_YES_NEW_DIAGONALS:
1058     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1059     break;
1060   case MAT_COLUMN_ORIENTED:
1061     a->roworiented = PETSC_FALSE;
1062     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1063     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1064     break;
1065   case MAT_IGNORE_OFF_PROC_ENTRIES:
1066     a->donotstash = PETSC_TRUE;
1067     break;
1068   case MAT_NO_NEW_DIAGONALS:
1069     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1070     break;
1071   default:
1072     SETERRQ(PETSC_ERR_SUP,"unknown option");
1073     break;
1074   }
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ"
1080 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1081 {
1082   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1083 
1084   PetscFunctionBegin;
1085   if (m) *m = mat->rstart;
1086   if (n) *n = mat->rend;
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatGetRow_MPIAIJ"
1092 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1093 {
1094   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1095   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1096   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1097   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1098   int        *cmap,*idx_p;
1099 
1100   PetscFunctionBegin;
1101   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1102   mat->getrowactive = PETSC_TRUE;
1103 
1104   if (!mat->rowvalues && (idx || v)) {
1105     /*
1106         allocate enough space to hold information from the longest row.
1107     */
1108     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1109     int     max = 1,tmp;
1110     for (i=0; i<matin->m; i++) {
1111       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1112       if (max < tmp) { max = tmp; }
1113     }
1114     ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr);
1115     mat->rowindices = (int*)(mat->rowvalues + max);
1116   }
1117 
1118   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1119   lrow = row - rstart;
1120 
1121   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1122   if (!v)   {pvA = 0; pvB = 0;}
1123   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1124   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1125   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1126   nztot = nzA + nzB;
1127 
1128   cmap  = mat->garray;
1129   if (v  || idx) {
1130     if (nztot) {
1131       /* Sort by increasing column numbers, assuming A and B already sorted */
1132       int imark = -1;
1133       if (v) {
1134         *v = v_p = mat->rowvalues;
1135         for (i=0; i<nzB; i++) {
1136           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1137           else break;
1138         }
1139         imark = i;
1140         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1141         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1142       }
1143       if (idx) {
1144         *idx = idx_p = mat->rowindices;
1145         if (imark > -1) {
1146           for (i=0; i<imark; i++) {
1147             idx_p[i] = cmap[cworkB[i]];
1148           }
1149         } else {
1150           for (i=0; i<nzB; i++) {
1151             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1152             else break;
1153           }
1154           imark = i;
1155         }
1156         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1157         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1158       }
1159     } else {
1160       if (idx) *idx = 0;
1161       if (v)   *v   = 0;
1162     }
1163   }
1164   *nz = nztot;
1165   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1166   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1172 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1173 {
1174   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1175 
1176   PetscFunctionBegin;
1177   if (aij->getrowactive == PETSC_FALSE) {
1178     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1179   }
1180   aij->getrowactive = PETSC_FALSE;
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatNorm_MPIAIJ"
1186 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1187 {
1188   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1189   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1190   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1191   PetscReal  sum = 0.0;
1192   Scalar     *v;
1193 
1194   PetscFunctionBegin;
1195   if (aij->size == 1) {
1196     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1197   } else {
1198     if (type == NORM_FROBENIUS) {
1199       v = amat->a;
1200       for (i=0; i<amat->nz; i++) {
1201 #if defined(PETSC_USE_COMPLEX)
1202         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1203 #else
1204         sum += (*v)*(*v); v++;
1205 #endif
1206       }
1207       v = bmat->a;
1208       for (i=0; i<bmat->nz; i++) {
1209 #if defined(PETSC_USE_COMPLEX)
1210         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1211 #else
1212         sum += (*v)*(*v); v++;
1213 #endif
1214       }
1215       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1216       *norm = sqrt(*norm);
1217     } else if (type == NORM_1) { /* max column norm */
1218       PetscReal *tmp,*tmp2;
1219       int    *jj,*garray = aij->garray;
1220       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1221       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1222       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1223       *norm = 0.0;
1224       v = amat->a; jj = amat->j;
1225       for (j=0; j<amat->nz; j++) {
1226         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1227       }
1228       v = bmat->a; jj = bmat->j;
1229       for (j=0; j<bmat->nz; j++) {
1230         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1231       }
1232       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1233       for (j=0; j<mat->N; j++) {
1234         if (tmp2[j] > *norm) *norm = tmp2[j];
1235       }
1236       ierr = PetscFree(tmp);CHKERRQ(ierr);
1237       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1238     } else if (type == NORM_INFINITY) { /* max row norm */
1239       PetscReal ntemp = 0.0;
1240       for (j=0; j<aij->A->m; j++) {
1241         v = amat->a + amat->i[j] + shift;
1242         sum = 0.0;
1243         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1244           sum += PetscAbsScalar(*v); v++;
1245         }
1246         v = bmat->a + bmat->i[j] + shift;
1247         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1248           sum += PetscAbsScalar(*v); v++;
1249         }
1250         if (sum > ntemp) ntemp = sum;
1251       }
1252       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1253     } else {
1254       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPIAIJ"
1262 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1263 {
1264   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1265   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1266   int        ierr,shift = Aloc->indexshift;
1267   int        M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1268   Mat        B;
1269   Scalar     *array;
1270 
1271   PetscFunctionBegin;
1272   if (!matout && M != N) {
1273     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1274   }
1275 
1276   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1277 
1278   /* copy over the A part */
1279   Aloc = (Mat_SeqAIJ*)a->A->data;
1280   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1281   row = a->rstart;
1282   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1283   for (i=0; i<m; i++) {
1284     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1285     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1286   }
1287   aj = Aloc->j;
1288   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1289 
1290   /* copy over the B part */
1291   Aloc = (Mat_SeqAIJ*)a->B->data;
1292   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1293   row  = a->rstart;
1294   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1295   ct   = cols;
1296   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1297   for (i=0; i<m; i++) {
1298     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1299     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1300   }
1301   ierr = PetscFree(ct);CHKERRQ(ierr);
1302   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1303   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1304   if (matout) {
1305     *matout = B;
1306   } else {
1307     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1314 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1315 {
1316   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1317   Mat        a = aij->A,b = aij->B;
1318   int        ierr,s1,s2,s3;
1319 
1320   PetscFunctionBegin;
1321   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1322   if (rr) {
1323     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1324     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1325     /* Overlap communication with computation. */
1326     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1327   }
1328   if (ll) {
1329     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1330     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1331     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1332   }
1333   /* scale  the diagonal block */
1334   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1335 
1336   if (rr) {
1337     /* Do a scatter end and then right scale the off-diagonal block */
1338     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1339     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1340   }
1341 
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1348 int MatPrintHelp_MPIAIJ(Mat A)
1349 {
1350   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1351   int        ierr;
1352 
1353   PetscFunctionBegin;
1354   if (!a->rank) {
1355     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1362 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1363 {
1364   PetscFunctionBegin;
1365   *bs = 1;
1366   PetscFunctionReturn(0);
1367 }
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1370 int MatSetUnfactored_MPIAIJ(Mat A)
1371 {
1372   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1373   int        ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatEqual_MPIAIJ"
1382 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1383 {
1384   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1385   Mat        a,b,c,d;
1386   PetscTruth flg;
1387   int        ierr;
1388 
1389   PetscFunctionBegin;
1390   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1391   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1392   a = matA->A; b = matA->B;
1393   c = matB->A; d = matB->B;
1394 
1395   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1396   if (flg == PETSC_TRUE) {
1397     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1398   }
1399   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatCopy_MPIAIJ"
1405 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1406 {
1407   int        ierr;
1408   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1409   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1410   PetscTruth flg;
1411 
1412   PetscFunctionBegin;
1413   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1414   if (str != SAME_NONZERO_PATTERN || !flg) {
1415     /* because of the column compression in the off-processor part of the matrix a->B,
1416        the number of columns in a->B and b->B may be different, hence we cannot call
1417        the MatCopy() directly on the two parts. If need be, we can provide a more
1418        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1419        then copying the submatrices */
1420     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1421   } else {
1422     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1423     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1430 int MatSetUpPreallocation_MPIAIJ(Mat A)
1431 {
1432   int        ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1440 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1441 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1442 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1443 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1444 #if !defined(PETSC_USE_COMPLEX)
1445 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1446 #endif
1447 
1448 /* -------------------------------------------------------------------*/
1449 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1450        MatGetRow_MPIAIJ,
1451        MatRestoreRow_MPIAIJ,
1452        MatMult_MPIAIJ,
1453        MatMultAdd_MPIAIJ,
1454        MatMultTranspose_MPIAIJ,
1455        MatMultTransposeAdd_MPIAIJ,
1456        0,
1457        0,
1458        0,
1459        0,
1460        0,
1461        0,
1462        MatRelax_MPIAIJ,
1463        MatTranspose_MPIAIJ,
1464        MatGetInfo_MPIAIJ,
1465        MatEqual_MPIAIJ,
1466        MatGetDiagonal_MPIAIJ,
1467        MatDiagonalScale_MPIAIJ,
1468        MatNorm_MPIAIJ,
1469        MatAssemblyBegin_MPIAIJ,
1470        MatAssemblyEnd_MPIAIJ,
1471        0,
1472        MatSetOption_MPIAIJ,
1473        MatZeroEntries_MPIAIJ,
1474        MatZeroRows_MPIAIJ,
1475 #if !defined(PETSC_USE_COMPLEX)
1476 				       MatLUFactorSymbolic_MPIAIJ_TFS,
1477 #else
1478        0,
1479 #endif
1480        0,
1481        0,
1482        0,
1483        MatSetUpPreallocation_MPIAIJ,
1484        0,
1485        MatGetOwnershipRange_MPIAIJ,
1486        0,
1487        0,
1488        0,
1489        0,
1490        MatDuplicate_MPIAIJ,
1491        0,
1492        0,
1493        0,
1494        0,
1495        0,
1496        MatGetSubMatrices_MPIAIJ,
1497        MatIncreaseOverlap_MPIAIJ,
1498        MatGetValues_MPIAIJ,
1499        MatCopy_MPIAIJ,
1500        MatPrintHelp_MPIAIJ,
1501        MatScale_MPIAIJ,
1502        0,
1503        0,
1504        0,
1505        MatGetBlockSize_MPIAIJ,
1506        0,
1507        0,
1508        0,
1509        0,
1510        MatFDColoringCreate_MPIAIJ,
1511        0,
1512        MatSetUnfactored_MPIAIJ,
1513        0,
1514        0,
1515        MatGetSubMatrix_MPIAIJ,
1516        MatDestroy_MPIAIJ,
1517        MatView_MPIAIJ,
1518        MatGetMaps_Petsc,
1519        0,
1520        0,
1521        0,
1522        0,
1523        0,
1524        0,
1525        0,
1526        0,
1527        MatSetColoring_MPIAIJ,
1528        MatSetValuesAdic_MPIAIJ,
1529        MatSetValuesAdifor_MPIAIJ
1530 };
1531 
1532 /* ----------------------------------------------------------------------------------------*/
1533 
1534 EXTERN_C_BEGIN
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1537 int MatStoreValues_MPIAIJ(Mat mat)
1538 {
1539   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1540   int        ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1544   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 EXTERN_C_END
1548 
1549 EXTERN_C_BEGIN
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1552 int MatRetrieveValues_MPIAIJ(Mat mat)
1553 {
1554   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1555   int        ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1559   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 EXTERN_C_END
1563 
1564 #include "petscpc.h"
1565 EXTERN_C_BEGIN
1566 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1567 EXTERN_C_END
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatCreate_MPIAIJ"
1572 int MatCreate_MPIAIJ(Mat B)
1573 {
1574   Mat_MPIAIJ   *b;
1575   int          ierr,i,size;
1576 
1577   PetscFunctionBegin;
1578   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1579 
1580   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1581   B->data         = (void*)b;
1582   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1583   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1584   B->factor       = 0;
1585   B->assembled    = PETSC_FALSE;
1586   B->mapping      = 0;
1587 
1588   B->insertmode      = NOT_SET_VALUES;
1589   b->size            = size;
1590   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1591 
1592   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1593   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1594 
1595   /* the information in the maps duplicates the information computed below, eventually
1596      we should remove the duplicate information that is not contained in the maps */
1597   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1598   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1599 
1600   /* build local table of row and column ownerships */
1601   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1602   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1603   b->cowners = b->rowners + b->size + 2;
1604   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1605   b->rowners[0] = 0;
1606   for (i=2; i<=b->size; i++) {
1607     b->rowners[i] += b->rowners[i-1];
1608   }
1609   b->rstart = b->rowners[b->rank];
1610   b->rend   = b->rowners[b->rank+1];
1611   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1612   b->cowners[0] = 0;
1613   for (i=2; i<=b->size; i++) {
1614     b->cowners[i] += b->cowners[i-1];
1615   }
1616   b->cstart = b->cowners[b->rank];
1617   b->cend   = b->cowners[b->rank+1];
1618 
1619   /* build cache for off array entries formed */
1620   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1621   b->donotstash  = PETSC_FALSE;
1622   b->colmap      = 0;
1623   b->garray      = 0;
1624   b->roworiented = PETSC_TRUE;
1625 
1626   /* stuff used for matrix vector multiply */
1627   b->lvec      = PETSC_NULL;
1628   b->Mvctx     = PETSC_NULL;
1629 
1630   /* stuff for MatGetRow() */
1631   b->rowindices   = 0;
1632   b->rowvalues    = 0;
1633   b->getrowactive = PETSC_FALSE;
1634 
1635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1636                                      "MatStoreValues_MPIAIJ",
1637                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1639                                      "MatRetrieveValues_MPIAIJ",
1640                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1642                                      "MatGetDiagonalBlock_MPIAIJ",
1643                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 EXTERN_C_END
1647 
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1650 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1651 {
1652   Mat        mat;
1653   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1654   int        ierr;
1655 
1656   PetscFunctionBegin;
1657   *newmat       = 0;
1658   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1659   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1660   a    = (Mat_MPIAIJ*)mat->data;
1661   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1662   mat->factor       = matin->factor;
1663   mat->assembled    = PETSC_TRUE;
1664   mat->insertmode   = NOT_SET_VALUES;
1665   mat->preallocated = PETSC_TRUE;
1666 
1667   a->rstart       = oldmat->rstart;
1668   a->rend         = oldmat->rend;
1669   a->cstart       = oldmat->cstart;
1670   a->cend         = oldmat->cend;
1671   a->size         = oldmat->size;
1672   a->rank         = oldmat->rank;
1673   a->donotstash   = oldmat->donotstash;
1674   a->roworiented  = oldmat->roworiented;
1675   a->rowindices   = 0;
1676   a->rowvalues    = 0;
1677   a->getrowactive = PETSC_FALSE;
1678 
1679   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1680   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1681   if (oldmat->colmap) {
1682 #if defined (PETSC_USE_CTABLE)
1683     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1684 #else
1685     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1686     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1687     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1688 #endif
1689   } else a->colmap = 0;
1690   if (oldmat->garray) {
1691     int len;
1692     len  = oldmat->B->n;
1693     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1694     PetscLogObjectMemory(mat,len*sizeof(int));
1695     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1696   } else a->garray = 0;
1697 
1698   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1699   PetscLogObjectParent(mat,a->lvec);
1700   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1701   PetscLogObjectParent(mat,a->Mvctx);
1702   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1703   PetscLogObjectParent(mat,a->A);
1704   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1705   PetscLogObjectParent(mat,a->B);
1706   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1707   *newmat = mat;
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #include "petscsys.h"
1712 
1713 EXTERN_C_BEGIN
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatLoad_MPIAIJ"
1716 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1717 {
1718   Mat          A;
1719   Scalar       *vals,*svals;
1720   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1721   MPI_Status   status;
1722   int          i,nz,ierr,j,rstart,rend,fd;
1723   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1724   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1725   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1726 
1727   PetscFunctionBegin;
1728   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1729   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1730   if (!rank) {
1731     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1732     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1733     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1734     if (header[3] < 0) {
1735       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1736     }
1737   }
1738 
1739   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1740   M = header[1]; N = header[2];
1741   /* determine ownership of all rows */
1742   m = M/size + ((M % size) > rank);
1743   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1744   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1745   rowners[0] = 0;
1746   for (i=2; i<=size; i++) {
1747     rowners[i] += rowners[i-1];
1748   }
1749   rstart = rowners[rank];
1750   rend   = rowners[rank+1];
1751 
1752   /* distribute row lengths to all processors */
1753   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1754   offlens = ourlens + (rend-rstart);
1755   if (!rank) {
1756     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1757     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1758     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1759     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1760     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1761     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1762   } else {
1763     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1764   }
1765 
1766   if (!rank) {
1767     /* calculate the number of nonzeros on each processor */
1768     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1769     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1770     for (i=0; i<size; i++) {
1771       for (j=rowners[i]; j< rowners[i+1]; j++) {
1772         procsnz[i] += rowlengths[j];
1773       }
1774     }
1775     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1776 
1777     /* determine max buffer needed and allocate it */
1778     maxnz = 0;
1779     for (i=0; i<size; i++) {
1780       maxnz = PetscMax(maxnz,procsnz[i]);
1781     }
1782     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1783 
1784     /* read in my part of the matrix column indices  */
1785     nz   = procsnz[0];
1786     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1787     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1788 
1789     /* read in every one elses and ship off */
1790     for (i=1; i<size; i++) {
1791       nz   = procsnz[i];
1792       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1793       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1794     }
1795     ierr = PetscFree(cols);CHKERRQ(ierr);
1796   } else {
1797     /* determine buffer space needed for message */
1798     nz = 0;
1799     for (i=0; i<m; i++) {
1800       nz += ourlens[i];
1801     }
1802     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1803 
1804     /* receive message of column indices*/
1805     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1806     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1807     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1808   }
1809 
1810   /* determine column ownership if matrix is not square */
1811   if (N != M) {
1812     n      = N/size + ((N % size) > rank);
1813     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1814     cstart = cend - n;
1815   } else {
1816     cstart = rstart;
1817     cend   = rend;
1818     n      = cend - cstart;
1819   }
1820 
1821   /* loop over local rows, determining number of off diagonal entries */
1822   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1823   jj = 0;
1824   for (i=0; i<m; i++) {
1825     for (j=0; j<ourlens[i]; j++) {
1826       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1827       jj++;
1828     }
1829   }
1830 
1831   /* create our matrix */
1832   for (i=0; i<m; i++) {
1833     ourlens[i] -= offlens[i];
1834   }
1835   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1836   A = *newmat;
1837   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1838   for (i=0; i<m; i++) {
1839     ourlens[i] += offlens[i];
1840   }
1841 
1842   if (!rank) {
1843     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1844 
1845     /* read in my part of the matrix numerical values  */
1846     nz   = procsnz[0];
1847     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1848 
1849     /* insert into matrix */
1850     jj      = rstart;
1851     smycols = mycols;
1852     svals   = vals;
1853     for (i=0; i<m; i++) {
1854       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1855       smycols += ourlens[i];
1856       svals   += ourlens[i];
1857       jj++;
1858     }
1859 
1860     /* read in other processors and ship out */
1861     for (i=1; i<size; i++) {
1862       nz   = procsnz[i];
1863       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1864       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1865     }
1866     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1867   } else {
1868     /* receive numeric values */
1869     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr);
1870 
1871     /* receive message of values*/
1872     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1873     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1874     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1875 
1876     /* insert into matrix */
1877     jj      = rstart;
1878     smycols = mycols;
1879     svals   = vals;
1880     for (i=0; i<m; i++) {
1881       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1882       smycols += ourlens[i];
1883       svals   += ourlens[i];
1884       jj++;
1885     }
1886   }
1887   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1888   ierr = PetscFree(vals);CHKERRQ(ierr);
1889   ierr = PetscFree(mycols);CHKERRQ(ierr);
1890   ierr = PetscFree(rowners);CHKERRQ(ierr);
1891 
1892   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1893   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 EXTERN_C_END
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1900 /*
1901     Not great since it makes two copies of the submatrix, first an SeqAIJ
1902   in local and then by concatenating the local matrices the end result.
1903   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1904 */
1905 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1906 {
1907   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1908   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1909   Mat        *local,M,Mreuse;
1910   Scalar     *vwork,*aa;
1911   MPI_Comm   comm = mat->comm;
1912   Mat_SeqAIJ *aij;
1913 
1914 
1915   PetscFunctionBegin;
1916   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1917   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1918 
1919   if (call ==  MAT_REUSE_MATRIX) {
1920     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1921     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1922     local = &Mreuse;
1923     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1924   } else {
1925     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1926     Mreuse = *local;
1927     ierr = PetscFree(local);CHKERRQ(ierr);
1928   }
1929 
1930   /*
1931       m - number of local rows
1932       n - number of columns (same on all processors)
1933       rstart - first row in new global matrix generated
1934   */
1935   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1936   if (call == MAT_INITIAL_MATRIX) {
1937     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1938     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1939     ii  = aij->i;
1940     jj  = aij->j;
1941 
1942     /*
1943         Determine the number of non-zeros in the diagonal and off-diagonal
1944         portions of the matrix in order to do correct preallocation
1945     */
1946 
1947     /* first get start and end of "diagonal" columns */
1948     if (csize == PETSC_DECIDE) {
1949       nlocal = n/size + ((n % size) > rank);
1950     } else {
1951       nlocal = csize;
1952     }
1953     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1954     rstart = rend - nlocal;
1955     if (rank == size - 1 && rend != n) {
1956       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1957     }
1958 
1959     /* next, compute all the lengths */
1960     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1961     olens = dlens + m;
1962     for (i=0; i<m; i++) {
1963       jend = ii[i+1] - ii[i];
1964       olen = 0;
1965       dlen = 0;
1966       for (j=0; j<jend; j++) {
1967         if (*jj < rstart || *jj >= rend) olen++;
1968         else dlen++;
1969         jj++;
1970       }
1971       olens[i] = olen;
1972       dlens[i] = dlen;
1973     }
1974     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1975     ierr = PetscFree(dlens);CHKERRQ(ierr);
1976   } else {
1977     int ml,nl;
1978 
1979     M = *newmat;
1980     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1981     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1982     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1983     /*
1984          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1985        rather than the slower MatSetValues().
1986     */
1987     M->was_assembled = PETSC_TRUE;
1988     M->assembled     = PETSC_FALSE;
1989   }
1990   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1991   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1992   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1993   ii  = aij->i;
1994   jj  = aij->j;
1995   aa  = aij->a;
1996   for (i=0; i<m; i++) {
1997     row   = rstart + i;
1998     nz    = ii[i+1] - ii[i];
1999     cwork = jj;     jj += nz;
2000     vwork = aa;     aa += nz;
2001     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2002   }
2003 
2004   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006   *newmat = M;
2007 
2008   /* save submatrix used in processor for next request */
2009   if (call ==  MAT_INITIAL_MATRIX) {
2010     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2011     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2012   }
2013 
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2019 /*@C
2020    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2021    (the default parallel PETSc format).  For good matrix assembly performance
2022    the user should preallocate the matrix storage by setting the parameters
2023    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2024    performance can be increased by more than a factor of 50.
2025 
2026    Collective on MPI_Comm
2027 
2028    Input Parameters:
2029 +  A - the matrix
2030 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2031            (same value is used for all local rows)
2032 .  d_nnz - array containing the number of nonzeros in the various rows of the
2033            DIAGONAL portion of the local submatrix (possibly different for each row)
2034            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2035            The size of this array is equal to the number of local rows, i.e 'm'.
2036            You must leave room for the diagonal entry even if it is zero.
2037 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2038            submatrix (same value is used for all local rows).
2039 -  o_nnz - array containing the number of nonzeros in the various rows of the
2040            OFF-DIAGONAL portion of the local submatrix (possibly different for
2041            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2042            structure. The size of this array is equal to the number
2043            of local rows, i.e 'm'.
2044 
2045    The AIJ format (also called the Yale sparse matrix format or
2046    compressed row storage), is fully compatible with standard Fortran 77
2047    storage.  That is, the stored row and column indices can begin at
2048    either one (as in Fortran) or zero.  See the users manual for details.
2049 
2050    The user MUST specify either the local or global matrix dimensions
2051    (possibly both).
2052 
2053    The parallel matrix is partitioned such that the first m0 rows belong to
2054    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2055    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2056 
2057    The DIAGONAL portion of the local submatrix of a processor can be defined
2058    as the submatrix which is obtained by extraction the part corresponding
2059    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2060    first row that belongs to the processor, and r2 is the last row belonging
2061    to the this processor. This is a square mxm matrix. The remaining portion
2062    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2063 
2064    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2065 
2066    By default, this format uses inodes (identical nodes) when possible.
2067    We search for consecutive rows with the same nonzero structure, thereby
2068    reusing matrix information to achieve increased efficiency.
2069 
2070    Options Database Keys:
2071 +  -mat_aij_no_inode  - Do not use inodes
2072 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2073 -  -mat_aij_oneindex - Internally use indexing starting at 1
2074         rather than 0.  Note that when calling MatSetValues(),
2075         the user still MUST index entries starting at 0!
2076 
2077    Example usage:
2078 
2079    Consider the following 8x8 matrix with 34 non-zero values, that is
2080    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2081    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2082    as follows:
2083 
2084 .vb
2085             1  2  0  |  0  3  0  |  0  4
2086     Proc0   0  5  6  |  7  0  0  |  8  0
2087             9  0 10  | 11  0  0  | 12  0
2088     -------------------------------------
2089            13  0 14  | 15 16 17  |  0  0
2090     Proc1   0 18  0  | 19 20 21  |  0  0
2091             0  0  0  | 22 23  0  | 24  0
2092     -------------------------------------
2093     Proc2  25 26 27  |  0  0 28  | 29  0
2094            30  0  0  | 31 32 33  |  0 34
2095 .ve
2096 
2097    This can be represented as a collection of submatrices as:
2098 
2099 .vb
2100       A B C
2101       D E F
2102       G H I
2103 .ve
2104 
2105    Where the submatrices A,B,C are owned by proc0, D,E,F are
2106    owned by proc1, G,H,I are owned by proc2.
2107 
2108    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2109    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2110    The 'M','N' parameters are 8,8, and have the same values on all procs.
2111 
2112    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2113    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2114    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2115    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2116    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2117    matrix, ans [DF] as another SeqAIJ matrix.
2118 
2119    When d_nz, o_nz parameters are specified, d_nz storage elements are
2120    allocated for every row of the local diagonal submatrix, and o_nz
2121    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2122    One way to choose d_nz and o_nz is to use the max nonzerors per local
2123    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2124    In this case, the values of d_nz,o_nz are:
2125 .vb
2126      proc0 : dnz = 2, o_nz = 2
2127      proc1 : dnz = 3, o_nz = 2
2128      proc2 : dnz = 1, o_nz = 4
2129 .ve
2130    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2131    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2132    for proc3. i.e we are using 12+15+10=37 storage locations to store
2133    34 values.
2134 
2135    When d_nnz, o_nnz parameters are specified, the storage is specified
2136    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2137    In the above case the values for d_nnz,o_nnz are:
2138 .vb
2139      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2140      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2141      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2142 .ve
2143    Here the space allocated is sum of all the above values i.e 34, and
2144    hence pre-allocation is perfect.
2145 
2146    Level: intermediate
2147 
2148 .keywords: matrix, aij, compressed row, sparse, parallel
2149 
2150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2151 @*/
2152 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2153 {
2154   Mat_MPIAIJ   *b;
2155   int          ierr,i;
2156   PetscTruth   flg2;
2157 
2158   PetscFunctionBegin;
2159   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2160   if (!flg2) PetscFunctionReturn(0);
2161   B->preallocated = PETSC_TRUE;
2162   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2163   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2164   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2165   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2166   if (d_nnz) {
2167     for (i=0; i<B->m; i++) {
2168       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2169     }
2170   }
2171   if (o_nnz) {
2172     for (i=0; i<B->m; i++) {
2173       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2174     }
2175   }
2176   b = (Mat_MPIAIJ*)B->data;
2177 
2178   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2179   PetscLogObjectParent(B,b->A);
2180   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2181   PetscLogObjectParent(B,b->B);
2182 
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatCreateMPIAIJ"
2188 /*@C
2189    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2190    (the default parallel PETSc format).  For good matrix assembly performance
2191    the user should preallocate the matrix storage by setting the parameters
2192    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2193    performance can be increased by more than a factor of 50.
2194 
2195    Collective on MPI_Comm
2196 
2197    Input Parameters:
2198 +  comm - MPI communicator
2199 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2200            This value should be the same as the local size used in creating the
2201            y vector for the matrix-vector product y = Ax.
2202 .  n - This value should be the same as the local size used in creating the
2203        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2204        calculated if N is given) For square matrices n is almost always m.
2205 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2206 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2207 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2208            (same value is used for all local rows)
2209 .  d_nnz - array containing the number of nonzeros in the various rows of the
2210            DIAGONAL portion of the local submatrix (possibly different for each row)
2211            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2212            The size of this array is equal to the number of local rows, i.e 'm'.
2213            You must leave room for the diagonal entry even if it is zero.
2214 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2215            submatrix (same value is used for all local rows).
2216 -  o_nnz - array containing the number of nonzeros in the various rows of the
2217            OFF-DIAGONAL portion of the local submatrix (possibly different for
2218            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2219            structure. The size of this array is equal to the number
2220            of local rows, i.e 'm'.
2221 
2222    Output Parameter:
2223 .  A - the matrix
2224 
2225    Notes:
2226    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2227    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2228    storage requirements for this matrix.
2229 
2230    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2231    processor than it must be used on all processors that share the object for
2232    that argument.
2233 
2234    The AIJ format (also called the Yale sparse matrix format or
2235    compressed row storage), is fully compatible with standard Fortran 77
2236    storage.  That is, the stored row and column indices can begin at
2237    either one (as in Fortran) or zero.  See the users manual for details.
2238 
2239    The user MUST specify either the local or global matrix dimensions
2240    (possibly both).
2241 
2242    The parallel matrix is partitioned such that the first m0 rows belong to
2243    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2244    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2245 
2246    The DIAGONAL portion of the local submatrix of a processor can be defined
2247    as the submatrix which is obtained by extraction the part corresponding
2248    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2249    first row that belongs to the processor, and r2 is the last row belonging
2250    to the this processor. This is a square mxm matrix. The remaining portion
2251    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2252 
2253    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2254 
2255    By default, this format uses inodes (identical nodes) when possible.
2256    We search for consecutive rows with the same nonzero structure, thereby
2257    reusing matrix information to achieve increased efficiency.
2258 
2259    Options Database Keys:
2260 +  -mat_aij_no_inode  - Do not use inodes
2261 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2262 -  -mat_aij_oneindex - Internally use indexing starting at 1
2263         rather than 0.  Note that when calling MatSetValues(),
2264         the user still MUST index entries starting at 0!
2265 
2266 
2267    Example usage:
2268 
2269    Consider the following 8x8 matrix with 34 non-zero values, that is
2270    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2271    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2272    as follows:
2273 
2274 .vb
2275             1  2  0  |  0  3  0  |  0  4
2276     Proc0   0  5  6  |  7  0  0  |  8  0
2277             9  0 10  | 11  0  0  | 12  0
2278     -------------------------------------
2279            13  0 14  | 15 16 17  |  0  0
2280     Proc1   0 18  0  | 19 20 21  |  0  0
2281             0  0  0  | 22 23  0  | 24  0
2282     -------------------------------------
2283     Proc2  25 26 27  |  0  0 28  | 29  0
2284            30  0  0  | 31 32 33  |  0 34
2285 .ve
2286 
2287    This can be represented as a collection of submatrices as:
2288 
2289 .vb
2290       A B C
2291       D E F
2292       G H I
2293 .ve
2294 
2295    Where the submatrices A,B,C are owned by proc0, D,E,F are
2296    owned by proc1, G,H,I are owned by proc2.
2297 
2298    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2299    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2300    The 'M','N' parameters are 8,8, and have the same values on all procs.
2301 
2302    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2303    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2304    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2305    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2306    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2307    matrix, ans [DF] as another SeqAIJ matrix.
2308 
2309    When d_nz, o_nz parameters are specified, d_nz storage elements are
2310    allocated for every row of the local diagonal submatrix, and o_nz
2311    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2312    One way to choose d_nz and o_nz is to use the max nonzerors per local
2313    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2314    In this case, the values of d_nz,o_nz are:
2315 .vb
2316      proc0 : dnz = 2, o_nz = 2
2317      proc1 : dnz = 3, o_nz = 2
2318      proc2 : dnz = 1, o_nz = 4
2319 .ve
2320    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2321    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2322    for proc3. i.e we are using 12+15+10=37 storage locations to store
2323    34 values.
2324 
2325    When d_nnz, o_nnz parameters are specified, the storage is specified
2326    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2327    In the above case the values for d_nnz,o_nnz are:
2328 .vb
2329      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2330      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2331      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2332 .ve
2333    Here the space allocated is sum of all the above values i.e 34, and
2334    hence pre-allocation is perfect.
2335 
2336    Level: intermediate
2337 
2338 .keywords: matrix, aij, compressed row, sparse, parallel
2339 
2340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2341 @*/
2342 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2343 {
2344   int ierr,size;
2345 
2346   PetscFunctionBegin;
2347   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2348   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2349   if (size > 1) {
2350     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2351     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2352   } else {
2353     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2354     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2355   }
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2361 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2362 {
2363   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2364   PetscFunctionBegin;
2365   *Ad     = a->A;
2366   *Ao     = a->B;
2367   *colmap = a->garray;
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2373 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2374 {
2375   int        ierr;
2376   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2377 
2378   PetscFunctionBegin;
2379   if (coloring->ctype == IS_COLORING_LOCAL) {
2380     int        *allcolors,*colors,i;
2381     ISColoring ocoloring;
2382 
2383     /* set coloring for diagonal portion */
2384     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2385 
2386     /* set coloring for off-diagonal portion */
2387     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2388     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2389     for (i=0; i<a->B->n; i++) {
2390       colors[i] = allcolors[a->garray[i]];
2391     }
2392     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2393     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2394     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2395     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2396   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2397     int        *colors,i,*larray;
2398     ISColoring ocoloring;
2399 
2400     /* set coloring for diagonal portion */
2401     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2402     for (i=0; i<a->A->n; i++) {
2403       larray[i] = i + a->cstart;
2404     }
2405     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2406     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2407     for (i=0; i<a->A->n; i++) {
2408       colors[i] = coloring->colors[larray[i]];
2409     }
2410     ierr = PetscFree(larray);CHKERRQ(ierr);
2411     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2412     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2413     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2414 
2415     /* set coloring for off-diagonal portion */
2416     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2417     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2418     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2419     for (i=0; i<a->B->n; i++) {
2420       colors[i] = coloring->colors[larray[i]];
2421     }
2422     ierr = PetscFree(larray);CHKERRQ(ierr);
2423     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2424     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2425     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2426   } else {
2427     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2428   }
2429 
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2435 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2436 {
2437   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2438   int        ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2442   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2448 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2449 {
2450   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2451   int        ierr;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2455   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458