xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 80fe4e4924c5f322a48ab2dc2966efa1d877fe84)
1 /*$Id: mpiaij.c,v 1.336 2001/06/01 14:57:20 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         Scalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
197   Scalar     value;
198   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int        cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat        A = aij->A;
204   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
205   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   Scalar     *aa = a->a;
207   PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat        B = aij->B;
209   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
210   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   Scalar     *ba = b->a;
212 
213   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int        nonew = a->nonew,shift = a->indexshift;
215   Scalar     *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(Scalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     Scalar      *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832     This has to provide several versions.
833 
834      2) a) use only local smoothing updating outer values only once.
835         b) local smoothing updating outer values each inner iteration
836      3) color updating out values betwen colors.
837 */
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRelax_MPIAIJ"
840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
841 {
842   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
843   Mat        AA = mat->A,BB = mat->B;
844   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
845   Scalar     *b,*x,*xs,*ls,d,*v,sum;
846   int        ierr,*idx,*diag;
847   int        n = matin->n,m = matin->m,i,shift = A->indexshift;
848 
849   PetscFunctionBegin;
850   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
851   diag = A->diag;
852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
855       PetscFunctionReturn(0);
856     }
857     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
858     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
859     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860     if (xx != bb) {
861       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862     } else {
863       b = x;
864     }
865     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
866     xs = x + shift; /* shift by one for index start of 1 */
867     ls = ls + shift;
868     while (its--) {
869       /* go down through the rows */
870       for (i=0; i<m; i++) {
871         n    = A->i[i+1] - A->i[i];
872 	PetscLogFlops(4*n+3);
873         idx  = A->j + A->i[i] + shift;
874         v    = A->a + A->i[i] + shift;
875         sum  = b[i];
876         SPARSEDENSEMDOT(sum,xs,v,idx,n);
877         d    = fshift + A->a[diag[i]+shift];
878         n    = B->i[i+1] - B->i[i];
879         idx  = B->j + B->i[i] + shift;
880         v    = B->a + B->i[i] + shift;
881         SPARSEDENSEMDOT(sum,ls,v,idx,n);
882         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
883       }
884       /* come up through the rows */
885       for (i=m-1; i>-1; i--) {
886         n    = A->i[i+1] - A->i[i];
887 	PetscLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899     }
900     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
902     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
903   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
904     if (flag & SOR_ZERO_INITIAL_GUESS) {
905       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
906       PetscFunctionReturn(0);
907     }
908     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
909     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
910     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
911     if (xx != bb) {
912       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
913     } else {
914       b = x;
915     }
916     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
917     xs = x + shift; /* shift by one for index start of 1 */
918     ls = ls + shift;
919     while (its--) {
920       for (i=0; i<m; i++) {
921         n    = A->i[i+1] - A->i[i];
922 	PetscLogFlops(4*n+3);
923         idx  = A->j + A->i[i] + shift;
924         v    = A->a + A->i[i] + shift;
925         sum  = b[i];
926         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927         d    = fshift + A->a[diag[i]+shift];
928         n    = B->i[i+1] - B->i[i];
929         idx  = B->j + B->i[i] + shift;
930         v    = B->a + B->i[i] + shift;
931         SPARSEDENSEMDOT(sum,ls,v,idx,n);
932         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
933       }
934     }
935     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
936     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
937     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
938   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
941       PetscFunctionReturn(0);
942     }
943     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
944     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
945     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
946     if (xx != bb) {
947       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
948     } else {
949       b = x;
950     }
951     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
952     xs = x + shift; /* shift by one for index start of 1 */
953     ls = ls + shift;
954     while (its--) {
955       for (i=m-1; i>-1; i--) {
956         n    = A->i[i+1] - A->i[i];
957 	PetscLogFlops(4*n+3);
958         idx  = A->j + A->i[i] + shift;
959         v    = A->a + A->i[i] + shift;
960         sum  = b[i];
961         SPARSEDENSEMDOT(sum,xs,v,idx,n);
962         d    = fshift + A->a[diag[i]+shift];
963         n    = B->i[i+1] - B->i[i];
964         idx  = B->j + B->i[i] + shift;
965         v    = B->a + B->i[i] + shift;
966         SPARSEDENSEMDOT(sum,ls,v,idx,n);
967         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
968       }
969     }
970     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
972     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   } else {
974     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatGetInfo_MPIAIJ"
981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
982 {
983   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
984   Mat        A = mat->A,B = mat->B;
985   int        ierr;
986   PetscReal  isend[5],irecv[5];
987 
988   PetscFunctionBegin;
989   info->block_size     = 1.0;
990   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
991   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
992   isend[3] = info->memory;  isend[4] = info->mallocs;
993   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
994   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
995   isend[3] += info->memory;  isend[4] += info->mallocs;
996   if (flag == MAT_LOCAL) {
997     info->nz_used      = isend[0];
998     info->nz_allocated = isend[1];
999     info->nz_unneeded  = isend[2];
1000     info->memory       = isend[3];
1001     info->mallocs      = isend[4];
1002   } else if (flag == MAT_GLOBAL_MAX) {
1003     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1004     info->nz_used      = irecv[0];
1005     info->nz_allocated = irecv[1];
1006     info->nz_unneeded  = irecv[2];
1007     info->memory       = irecv[3];
1008     info->mallocs      = irecv[4];
1009   } else if (flag == MAT_GLOBAL_SUM) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   }
1017   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1018   info->fill_ratio_needed = 0;
1019   info->factor_mallocs    = 0;
1020   info->rows_global       = (double)matin->M;
1021   info->columns_global    = (double)matin->N;
1022   info->rows_local        = (double)matin->m;
1023   info->columns_local     = (double)matin->N;
1024 
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatSetOption_MPIAIJ"
1030 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1033   int        ierr;
1034 
1035   PetscFunctionBegin;
1036   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1037       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1038       op == MAT_COLUMNS_UNSORTED ||
1039       op == MAT_COLUMNS_SORTED ||
1040       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1041       op == MAT_KEEP_ZEROED_ROWS ||
1042       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1043       op == MAT_USE_INODES ||
1044       op == MAT_DO_NOT_USE_INODES ||
1045       op == MAT_IGNORE_ZERO_ENTRIES) {
1046         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1047         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1048   } else if (op == MAT_ROW_ORIENTED) {
1049     a->roworiented = PETSC_TRUE;
1050     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1051     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1052   } else if (op == MAT_ROWS_SORTED ||
1053              op == MAT_ROWS_UNSORTED ||
1054              op == MAT_YES_NEW_DIAGONALS) {
1055     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1056   } else if (op == MAT_COLUMN_ORIENTED) {
1057     a->roworiented = PETSC_FALSE;
1058     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1059     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1060   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1061     a->donotstash = PETSC_TRUE;
1062   } else if (op == MAT_NO_NEW_DIAGONALS){
1063     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1064   } else {
1065     SETERRQ(PETSC_ERR_SUP,"unknown option");
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ"
1072 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1073 {
1074   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1075 
1076   PetscFunctionBegin;
1077   if (m) *m = mat->rstart;
1078   if (n) *n = mat->rend;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatGetRow_MPIAIJ"
1084 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1085 {
1086   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1087   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1088   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1089   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1090   int        *cmap,*idx_p;
1091 
1092   PetscFunctionBegin;
1093   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1094   mat->getrowactive = PETSC_TRUE;
1095 
1096   if (!mat->rowvalues && (idx || v)) {
1097     /*
1098         allocate enough space to hold information from the longest row.
1099     */
1100     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1101     int     max = 1,tmp;
1102     for (i=0; i<matin->m; i++) {
1103       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1104       if (max < tmp) { max = tmp; }
1105     }
1106     ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr);
1107     mat->rowindices = (int*)(mat->rowvalues + max);
1108   }
1109 
1110   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1111   lrow = row - rstart;
1112 
1113   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1114   if (!v)   {pvA = 0; pvB = 0;}
1115   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1116   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1117   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1118   nztot = nzA + nzB;
1119 
1120   cmap  = mat->garray;
1121   if (v  || idx) {
1122     if (nztot) {
1123       /* Sort by increasing column numbers, assuming A and B already sorted */
1124       int imark = -1;
1125       if (v) {
1126         *v = v_p = mat->rowvalues;
1127         for (i=0; i<nzB; i++) {
1128           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1129           else break;
1130         }
1131         imark = i;
1132         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1133         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1134       }
1135       if (idx) {
1136         *idx = idx_p = mat->rowindices;
1137         if (imark > -1) {
1138           for (i=0; i<imark; i++) {
1139             idx_p[i] = cmap[cworkB[i]];
1140           }
1141         } else {
1142           for (i=0; i<nzB; i++) {
1143             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1144             else break;
1145           }
1146           imark = i;
1147         }
1148         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1149         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1150       }
1151     } else {
1152       if (idx) *idx = 0;
1153       if (v)   *v   = 0;
1154     }
1155   }
1156   *nz = nztot;
1157   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1158   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1164 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1165 {
1166   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1167 
1168   PetscFunctionBegin;
1169   if (aij->getrowactive == PETSC_FALSE) {
1170     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1171   }
1172   aij->getrowactive = PETSC_FALSE;
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatNorm_MPIAIJ"
1178 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1179 {
1180   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1181   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1182   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1183   PetscReal  sum = 0.0;
1184   Scalar     *v;
1185 
1186   PetscFunctionBegin;
1187   if (aij->size == 1) {
1188     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1189   } else {
1190     if (type == NORM_FROBENIUS) {
1191       v = amat->a;
1192       for (i=0; i<amat->nz; i++) {
1193 #if defined(PETSC_USE_COMPLEX)
1194         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1195 #else
1196         sum += (*v)*(*v); v++;
1197 #endif
1198       }
1199       v = bmat->a;
1200       for (i=0; i<bmat->nz; i++) {
1201 #if defined(PETSC_USE_COMPLEX)
1202         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1203 #else
1204         sum += (*v)*(*v); v++;
1205 #endif
1206       }
1207       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1208       *norm = sqrt(*norm);
1209     } else if (type == NORM_1) { /* max column norm */
1210       PetscReal *tmp,*tmp2;
1211       int    *jj,*garray = aij->garray;
1212       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1213       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1214       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1215       *norm = 0.0;
1216       v = amat->a; jj = amat->j;
1217       for (j=0; j<amat->nz; j++) {
1218         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1219       }
1220       v = bmat->a; jj = bmat->j;
1221       for (j=0; j<bmat->nz; j++) {
1222         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1223       }
1224       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1225       for (j=0; j<mat->N; j++) {
1226         if (tmp2[j] > *norm) *norm = tmp2[j];
1227       }
1228       ierr = PetscFree(tmp);CHKERRQ(ierr);
1229       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1230     } else if (type == NORM_INFINITY) { /* max row norm */
1231       PetscReal ntemp = 0.0;
1232       for (j=0; j<aij->A->m; j++) {
1233         v = amat->a + amat->i[j] + shift;
1234         sum = 0.0;
1235         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1236           sum += PetscAbsScalar(*v); v++;
1237         }
1238         v = bmat->a + bmat->i[j] + shift;
1239         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1240           sum += PetscAbsScalar(*v); v++;
1241         }
1242         if (sum > ntemp) ntemp = sum;
1243       }
1244       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1245     } else {
1246       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1247     }
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatTranspose_MPIAIJ"
1254 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1255 {
1256   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1257   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1258   int        ierr,shift = Aloc->indexshift;
1259   int        M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1260   Mat        B;
1261   Scalar     *array;
1262 
1263   PetscFunctionBegin;
1264   if (!matout && M != N) {
1265     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1266   }
1267 
1268   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1269 
1270   /* copy over the A part */
1271   Aloc = (Mat_SeqAIJ*)a->A->data;
1272   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1273   row = a->rstart;
1274   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1275   for (i=0; i<m; i++) {
1276     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1277     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1278   }
1279   aj = Aloc->j;
1280   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1281 
1282   /* copy over the B part */
1283   Aloc = (Mat_SeqAIJ*)a->B->data;
1284   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1285   row  = a->rstart;
1286   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1287   ct   = cols;
1288   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1289   for (i=0; i<m; i++) {
1290     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1291     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1292   }
1293   ierr = PetscFree(ct);CHKERRQ(ierr);
1294   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296   if (matout) {
1297     *matout = B;
1298   } else {
1299     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1306 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1307 {
1308   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1309   Mat        a = aij->A,b = aij->B;
1310   int        ierr,s1,s2,s3;
1311 
1312   PetscFunctionBegin;
1313   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1314   if (rr) {
1315     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1316     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1317     /* Overlap communication with computation. */
1318     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1319   }
1320   if (ll) {
1321     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1322     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1323     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1324   }
1325   /* scale  the diagonal block */
1326   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1327 
1328   if (rr) {
1329     /* Do a scatter end and then right scale the off-diagonal block */
1330     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1331     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1332   }
1333 
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1340 int MatPrintHelp_MPIAIJ(Mat A)
1341 {
1342   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1343   int        ierr;
1344 
1345   PetscFunctionBegin;
1346   if (!a->rank) {
1347     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1354 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1355 {
1356   PetscFunctionBegin;
1357   *bs = 1;
1358   PetscFunctionReturn(0);
1359 }
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1362 int MatSetUnfactored_MPIAIJ(Mat A)
1363 {
1364   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1365   int        ierr;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatEqual_MPIAIJ"
1374 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1375 {
1376   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1377   Mat        a,b,c,d;
1378   PetscTruth flg;
1379   int        ierr;
1380 
1381   PetscFunctionBegin;
1382   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1383   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1384   a = matA->A; b = matA->B;
1385   c = matB->A; d = matB->B;
1386 
1387   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1388   if (flg == PETSC_TRUE) {
1389     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1390   }
1391   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatCopy_MPIAIJ"
1397 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1398 {
1399   int        ierr;
1400   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1401   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1402   PetscTruth flg;
1403 
1404   PetscFunctionBegin;
1405   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1406   if (str != SAME_NONZERO_PATTERN || !flg) {
1407     /* because of the column compression in the off-processor part of the matrix a->B,
1408        the number of columns in a->B and b->B may be different, hence we cannot call
1409        the MatCopy() directly on the two parts. If need be, we can provide a more
1410        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1411        then copying the submatrices */
1412     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1413   } else {
1414     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1415     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1422 int MatSetUpPreallocation_MPIAIJ(Mat A)
1423 {
1424   int        ierr;
1425 
1426   PetscFunctionBegin;
1427   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1432 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1433 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1434 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1435 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1436 #if !defined(PETSC_USE_COMPLEX)
1437 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1438 #endif
1439 
1440 /* -------------------------------------------------------------------*/
1441 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1442        MatGetRow_MPIAIJ,
1443        MatRestoreRow_MPIAIJ,
1444        MatMult_MPIAIJ,
1445        MatMultAdd_MPIAIJ,
1446        MatMultTranspose_MPIAIJ,
1447        MatMultTransposeAdd_MPIAIJ,
1448        0,
1449        0,
1450        0,
1451        0,
1452        0,
1453        0,
1454        MatRelax_MPIAIJ,
1455        MatTranspose_MPIAIJ,
1456        MatGetInfo_MPIAIJ,
1457        MatEqual_MPIAIJ,
1458        MatGetDiagonal_MPIAIJ,
1459        MatDiagonalScale_MPIAIJ,
1460        MatNorm_MPIAIJ,
1461        MatAssemblyBegin_MPIAIJ,
1462        MatAssemblyEnd_MPIAIJ,
1463        0,
1464        MatSetOption_MPIAIJ,
1465        MatZeroEntries_MPIAIJ,
1466        MatZeroRows_MPIAIJ,
1467 #if !defined(PETSC_USE_COMPLEX)
1468 				       MatLUFactorSymbolic_MPIAIJ_TFS,
1469 #else
1470        0,
1471 #endif
1472        0,
1473        0,
1474        0,
1475        MatSetUpPreallocation_MPIAIJ,
1476        0,
1477        MatGetOwnershipRange_MPIAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482        MatDuplicate_MPIAIJ,
1483        0,
1484        0,
1485        0,
1486        0,
1487        0,
1488        MatGetSubMatrices_MPIAIJ,
1489        MatIncreaseOverlap_MPIAIJ,
1490        MatGetValues_MPIAIJ,
1491        MatCopy_MPIAIJ,
1492        MatPrintHelp_MPIAIJ,
1493        MatScale_MPIAIJ,
1494        0,
1495        0,
1496        0,
1497        MatGetBlockSize_MPIAIJ,
1498        0,
1499        0,
1500        0,
1501        0,
1502        MatFDColoringCreate_MPIAIJ,
1503        0,
1504        MatSetUnfactored_MPIAIJ,
1505        0,
1506        0,
1507        MatGetSubMatrix_MPIAIJ,
1508        MatDestroy_MPIAIJ,
1509        MatView_MPIAIJ,
1510        MatGetMaps_Petsc,
1511        0,
1512        0,
1513        0,
1514        0,
1515        0,
1516        0,
1517        0,
1518        0,
1519        MatSetColoring_MPIAIJ,
1520        MatSetValuesAdic_MPIAIJ,
1521        MatSetValuesAdifor_MPIAIJ
1522 };
1523 
1524 /* ----------------------------------------------------------------------------------------*/
1525 
1526 EXTERN_C_BEGIN
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1529 int MatStoreValues_MPIAIJ(Mat mat)
1530 {
1531   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1532   int        ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1536   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 EXTERN_C_END
1540 
1541 EXTERN_C_BEGIN
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1544 int MatRetrieveValues_MPIAIJ(Mat mat)
1545 {
1546   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1547   int        ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1551   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 EXTERN_C_END
1555 
1556 #include "petscpc.h"
1557 EXTERN_C_BEGIN
1558 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1559 EXTERN_C_END
1560 
1561 EXTERN_C_BEGIN
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatCreate_MPIAIJ"
1564 int MatCreate_MPIAIJ(Mat B)
1565 {
1566   Mat_MPIAIJ   *b;
1567   int          ierr,i,size;
1568 
1569   PetscFunctionBegin;
1570   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1571 
1572   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1573   B->data         = (void*)b;
1574   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1575   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1576   B->factor       = 0;
1577   B->assembled    = PETSC_FALSE;
1578   B->mapping      = 0;
1579 
1580   B->insertmode      = NOT_SET_VALUES;
1581   b->size            = size;
1582   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1583 
1584   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1585   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1586 
1587   /* the information in the maps duplicates the information computed below, eventually
1588      we should remove the duplicate information that is not contained in the maps */
1589   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1590   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1591 
1592   /* build local table of row and column ownerships */
1593   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1594   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1595   b->cowners = b->rowners + b->size + 2;
1596   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1597   b->rowners[0] = 0;
1598   for (i=2; i<=b->size; i++) {
1599     b->rowners[i] += b->rowners[i-1];
1600   }
1601   b->rstart = b->rowners[b->rank];
1602   b->rend   = b->rowners[b->rank+1];
1603   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1604   b->cowners[0] = 0;
1605   for (i=2; i<=b->size; i++) {
1606     b->cowners[i] += b->cowners[i-1];
1607   }
1608   b->cstart = b->cowners[b->rank];
1609   b->cend   = b->cowners[b->rank+1];
1610 
1611   /* build cache for off array entries formed */
1612   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1613   b->donotstash  = PETSC_FALSE;
1614   b->colmap      = 0;
1615   b->garray      = 0;
1616   b->roworiented = PETSC_TRUE;
1617 
1618   /* stuff used for matrix vector multiply */
1619   b->lvec      = PETSC_NULL;
1620   b->Mvctx     = PETSC_NULL;
1621 
1622   /* stuff for MatGetRow() */
1623   b->rowindices   = 0;
1624   b->rowvalues    = 0;
1625   b->getrowactive = PETSC_FALSE;
1626 
1627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1628                                      "MatStoreValues_MPIAIJ",
1629                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1631                                      "MatRetrieveValues_MPIAIJ",
1632                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1634                                      "MatGetDiagonalBlock_MPIAIJ",
1635                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1636   PetscFunctionReturn(0);
1637 }
1638 EXTERN_C_END
1639 
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1642 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1643 {
1644   Mat        mat;
1645   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1646   int        ierr;
1647 
1648   PetscFunctionBegin;
1649   *newmat       = 0;
1650   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1651   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1652   a    = (Mat_MPIAIJ*)mat->data;
1653   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1654   mat->factor       = matin->factor;
1655   mat->assembled    = PETSC_TRUE;
1656   mat->insertmode   = NOT_SET_VALUES;
1657   mat->preallocated = PETSC_TRUE;
1658 
1659   a->rstart       = oldmat->rstart;
1660   a->rend         = oldmat->rend;
1661   a->cstart       = oldmat->cstart;
1662   a->cend         = oldmat->cend;
1663   a->size         = oldmat->size;
1664   a->rank         = oldmat->rank;
1665   a->donotstash   = oldmat->donotstash;
1666   a->roworiented  = oldmat->roworiented;
1667   a->rowindices   = 0;
1668   a->rowvalues    = 0;
1669   a->getrowactive = PETSC_FALSE;
1670 
1671   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1672   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1673   if (oldmat->colmap) {
1674 #if defined (PETSC_USE_CTABLE)
1675     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1676 #else
1677     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1678     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1679     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1680 #endif
1681   } else a->colmap = 0;
1682   if (oldmat->garray) {
1683     int len;
1684     len  = oldmat->B->n;
1685     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1686     PetscLogObjectMemory(mat,len*sizeof(int));
1687     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1688   } else a->garray = 0;
1689 
1690   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1691   PetscLogObjectParent(mat,a->lvec);
1692   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1693   PetscLogObjectParent(mat,a->Mvctx);
1694   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1695   PetscLogObjectParent(mat,a->A);
1696   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1697   PetscLogObjectParent(mat,a->B);
1698   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1699   *newmat = mat;
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #include "petscsys.h"
1704 
1705 EXTERN_C_BEGIN
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatLoad_MPIAIJ"
1708 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1709 {
1710   Mat          A;
1711   Scalar       *vals,*svals;
1712   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1713   MPI_Status   status;
1714   int          i,nz,ierr,j,rstart,rend,fd;
1715   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1716   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1717   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1718 
1719   PetscFunctionBegin;
1720   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1721   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1722   if (!rank) {
1723     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1724     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1725     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1726     if (header[3] < 0) {
1727       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1728     }
1729   }
1730 
1731   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1732   M = header[1]; N = header[2];
1733   /* determine ownership of all rows */
1734   m = M/size + ((M % size) > rank);
1735   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1736   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1737   rowners[0] = 0;
1738   for (i=2; i<=size; i++) {
1739     rowners[i] += rowners[i-1];
1740   }
1741   rstart = rowners[rank];
1742   rend   = rowners[rank+1];
1743 
1744   /* distribute row lengths to all processors */
1745   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1746   offlens = ourlens + (rend-rstart);
1747   if (!rank) {
1748     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1749     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1750     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1751     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1752     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1753     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1754   } else {
1755     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1756   }
1757 
1758   if (!rank) {
1759     /* calculate the number of nonzeros on each processor */
1760     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1761     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1762     for (i=0; i<size; i++) {
1763       for (j=rowners[i]; j< rowners[i+1]; j++) {
1764         procsnz[i] += rowlengths[j];
1765       }
1766     }
1767     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1768 
1769     /* determine max buffer needed and allocate it */
1770     maxnz = 0;
1771     for (i=0; i<size; i++) {
1772       maxnz = PetscMax(maxnz,procsnz[i]);
1773     }
1774     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1775 
1776     /* read in my part of the matrix column indices  */
1777     nz   = procsnz[0];
1778     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1779     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1780 
1781     /* read in every one elses and ship off */
1782     for (i=1; i<size; i++) {
1783       nz   = procsnz[i];
1784       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1785       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1786     }
1787     ierr = PetscFree(cols);CHKERRQ(ierr);
1788   } else {
1789     /* determine buffer space needed for message */
1790     nz = 0;
1791     for (i=0; i<m; i++) {
1792       nz += ourlens[i];
1793     }
1794     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1795 
1796     /* receive message of column indices*/
1797     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1798     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1799     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1800   }
1801 
1802   /* determine column ownership if matrix is not square */
1803   if (N != M) {
1804     n      = N/size + ((N % size) > rank);
1805     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1806     cstart = cend - n;
1807   } else {
1808     cstart = rstart;
1809     cend   = rend;
1810     n      = cend - cstart;
1811   }
1812 
1813   /* loop over local rows, determining number of off diagonal entries */
1814   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1815   jj = 0;
1816   for (i=0; i<m; i++) {
1817     for (j=0; j<ourlens[i]; j++) {
1818       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1819       jj++;
1820     }
1821   }
1822 
1823   /* create our matrix */
1824   for (i=0; i<m; i++) {
1825     ourlens[i] -= offlens[i];
1826   }
1827   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1828   A = *newmat;
1829   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1830   for (i=0; i<m; i++) {
1831     ourlens[i] += offlens[i];
1832   }
1833 
1834   if (!rank) {
1835     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1836 
1837     /* read in my part of the matrix numerical values  */
1838     nz   = procsnz[0];
1839     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1840 
1841     /* insert into matrix */
1842     jj      = rstart;
1843     smycols = mycols;
1844     svals   = vals;
1845     for (i=0; i<m; i++) {
1846       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1847       smycols += ourlens[i];
1848       svals   += ourlens[i];
1849       jj++;
1850     }
1851 
1852     /* read in other processors and ship out */
1853     for (i=1; i<size; i++) {
1854       nz   = procsnz[i];
1855       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1856       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1857     }
1858     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1859   } else {
1860     /* receive numeric values */
1861     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr);
1862 
1863     /* receive message of values*/
1864     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1865     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1866     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1867 
1868     /* insert into matrix */
1869     jj      = rstart;
1870     smycols = mycols;
1871     svals   = vals;
1872     for (i=0; i<m; i++) {
1873       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1874       smycols += ourlens[i];
1875       svals   += ourlens[i];
1876       jj++;
1877     }
1878   }
1879   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1880   ierr = PetscFree(vals);CHKERRQ(ierr);
1881   ierr = PetscFree(mycols);CHKERRQ(ierr);
1882   ierr = PetscFree(rowners);CHKERRQ(ierr);
1883 
1884   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 EXTERN_C_END
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1892 /*
1893     Not great since it makes two copies of the submatrix, first an SeqAIJ
1894   in local and then by concatenating the local matrices the end result.
1895   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1896 */
1897 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1898 {
1899   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1900   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1901   Mat        *local,M,Mreuse;
1902   Scalar     *vwork,*aa;
1903   MPI_Comm   comm = mat->comm;
1904   Mat_SeqAIJ *aij;
1905 
1906 
1907   PetscFunctionBegin;
1908   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1910 
1911   if (call ==  MAT_REUSE_MATRIX) {
1912     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1913     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1914     local = &Mreuse;
1915     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1916   } else {
1917     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1918     Mreuse = *local;
1919     ierr = PetscFree(local);CHKERRQ(ierr);
1920   }
1921 
1922   /*
1923       m - number of local rows
1924       n - number of columns (same on all processors)
1925       rstart - first row in new global matrix generated
1926   */
1927   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1928   if (call == MAT_INITIAL_MATRIX) {
1929     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1930     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1931     ii  = aij->i;
1932     jj  = aij->j;
1933 
1934     /*
1935         Determine the number of non-zeros in the diagonal and off-diagonal
1936         portions of the matrix in order to do correct preallocation
1937     */
1938 
1939     /* first get start and end of "diagonal" columns */
1940     if (csize == PETSC_DECIDE) {
1941       nlocal = n/size + ((n % size) > rank);
1942     } else {
1943       nlocal = csize;
1944     }
1945     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1946     rstart = rend - nlocal;
1947     if (rank == size - 1 && rend != n) {
1948       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1949     }
1950 
1951     /* next, compute all the lengths */
1952     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1953     olens = dlens + m;
1954     for (i=0; i<m; i++) {
1955       jend = ii[i+1] - ii[i];
1956       olen = 0;
1957       dlen = 0;
1958       for (j=0; j<jend; j++) {
1959         if (*jj < rstart || *jj >= rend) olen++;
1960         else dlen++;
1961         jj++;
1962       }
1963       olens[i] = olen;
1964       dlens[i] = dlen;
1965     }
1966     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1967     ierr = PetscFree(dlens);CHKERRQ(ierr);
1968   } else {
1969     int ml,nl;
1970 
1971     M = *newmat;
1972     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1973     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1974     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1975     /*
1976          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1977        rather than the slower MatSetValues().
1978     */
1979     M->was_assembled = PETSC_TRUE;
1980     M->assembled     = PETSC_FALSE;
1981   }
1982   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1983   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1984   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1985   ii  = aij->i;
1986   jj  = aij->j;
1987   aa  = aij->a;
1988   for (i=0; i<m; i++) {
1989     row   = rstart + i;
1990     nz    = ii[i+1] - ii[i];
1991     cwork = jj;     jj += nz;
1992     vwork = aa;     aa += nz;
1993     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1994   }
1995 
1996   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   *newmat = M;
1999 
2000   /* save submatrix used in processor for next request */
2001   if (call ==  MAT_INITIAL_MATRIX) {
2002     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2003     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2004   }
2005 
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2011 /*@C
2012    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2013    (the default parallel PETSc format).  For good matrix assembly performance
2014    the user should preallocate the matrix storage by setting the parameters
2015    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2016    performance can be increased by more than a factor of 50.
2017 
2018    Collective on MPI_Comm
2019 
2020    Input Parameters:
2021 +  A - the matrix
2022 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2023            (same value is used for all local rows)
2024 .  d_nnz - array containing the number of nonzeros in the various rows of the
2025            DIAGONAL portion of the local submatrix (possibly different for each row)
2026            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2027            The size of this array is equal to the number of local rows, i.e 'm'.
2028            You must leave room for the diagonal entry even if it is zero.
2029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2030            submatrix (same value is used for all local rows).
2031 -  o_nnz - array containing the number of nonzeros in the various rows of the
2032            OFF-DIAGONAL portion of the local submatrix (possibly different for
2033            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2034            structure. The size of this array is equal to the number
2035            of local rows, i.e 'm'.
2036 
2037    The AIJ format (also called the Yale sparse matrix format or
2038    compressed row storage), is fully compatible with standard Fortran 77
2039    storage.  That is, the stored row and column indices can begin at
2040    either one (as in Fortran) or zero.  See the users manual for details.
2041 
2042    The user MUST specify either the local or global matrix dimensions
2043    (possibly both).
2044 
2045    The parallel matrix is partitioned such that the first m0 rows belong to
2046    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2047    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2048 
2049    The DIAGONAL portion of the local submatrix of a processor can be defined
2050    as the submatrix which is obtained by extraction the part corresponding
2051    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2052    first row that belongs to the processor, and r2 is the last row belonging
2053    to the this processor. This is a square mxm matrix. The remaining portion
2054    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2055 
2056    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2057 
2058    By default, this format uses inodes (identical nodes) when possible.
2059    We search for consecutive rows with the same nonzero structure, thereby
2060    reusing matrix information to achieve increased efficiency.
2061 
2062    Options Database Keys:
2063 +  -mat_aij_no_inode  - Do not use inodes
2064 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2065 -  -mat_aij_oneindex - Internally use indexing starting at 1
2066         rather than 0.  Note that when calling MatSetValues(),
2067         the user still MUST index entries starting at 0!
2068 
2069    Example usage:
2070 
2071    Consider the following 8x8 matrix with 34 non-zero values, that is
2072    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2073    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2074    as follows:
2075 
2076 .vb
2077             1  2  0  |  0  3  0  |  0  4
2078     Proc0   0  5  6  |  7  0  0  |  8  0
2079             9  0 10  | 11  0  0  | 12  0
2080     -------------------------------------
2081            13  0 14  | 15 16 17  |  0  0
2082     Proc1   0 18  0  | 19 20 21  |  0  0
2083             0  0  0  | 22 23  0  | 24  0
2084     -------------------------------------
2085     Proc2  25 26 27  |  0  0 28  | 29  0
2086            30  0  0  | 31 32 33  |  0 34
2087 .ve
2088 
2089    This can be represented as a collection of submatrices as:
2090 
2091 .vb
2092       A B C
2093       D E F
2094       G H I
2095 .ve
2096 
2097    Where the submatrices A,B,C are owned by proc0, D,E,F are
2098    owned by proc1, G,H,I are owned by proc2.
2099 
2100    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2101    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2102    The 'M','N' parameters are 8,8, and have the same values on all procs.
2103 
2104    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2105    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2106    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2107    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2108    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2109    matrix, ans [DF] as another SeqAIJ matrix.
2110 
2111    When d_nz, o_nz parameters are specified, d_nz storage elements are
2112    allocated for every row of the local diagonal submatrix, and o_nz
2113    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2114    One way to choose d_nz and o_nz is to use the max nonzerors per local
2115    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2116    In this case, the values of d_nz,o_nz are:
2117 .vb
2118      proc0 : dnz = 2, o_nz = 2
2119      proc1 : dnz = 3, o_nz = 2
2120      proc2 : dnz = 1, o_nz = 4
2121 .ve
2122    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2123    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2124    for proc3. i.e we are using 12+15+10=37 storage locations to store
2125    34 values.
2126 
2127    When d_nnz, o_nnz parameters are specified, the storage is specified
2128    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2129    In the above case the values for d_nnz,o_nnz are:
2130 .vb
2131      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2132      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2133      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2134 .ve
2135    Here the space allocated is sum of all the above values i.e 34, and
2136    hence pre-allocation is perfect.
2137 
2138    Level: intermediate
2139 
2140 .keywords: matrix, aij, compressed row, sparse, parallel
2141 
2142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2143 @*/
2144 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2145 {
2146   Mat_MPIAIJ   *b;
2147   int          ierr,i;
2148   PetscTruth   flg2;
2149 
2150   PetscFunctionBegin;
2151   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2152   if (!flg2) PetscFunctionReturn(0);
2153   B->preallocated = PETSC_TRUE;
2154   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2155   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2156   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2157   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2158   if (d_nnz) {
2159     for (i=0; i<B->m; i++) {
2160       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2161     }
2162   }
2163   if (o_nnz) {
2164     for (i=0; i<B->m; i++) {
2165       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2166     }
2167   }
2168   b = (Mat_MPIAIJ*)B->data;
2169 
2170   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2171   PetscLogObjectParent(B,b->A);
2172   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2173   PetscLogObjectParent(B,b->B);
2174 
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatCreateMPIAIJ"
2180 /*@C
2181    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2182    (the default parallel PETSc format).  For good matrix assembly performance
2183    the user should preallocate the matrix storage by setting the parameters
2184    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2185    performance can be increased by more than a factor of 50.
2186 
2187    Collective on MPI_Comm
2188 
2189    Input Parameters:
2190 +  comm - MPI communicator
2191 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2192            This value should be the same as the local size used in creating the
2193            y vector for the matrix-vector product y = Ax.
2194 .  n - This value should be the same as the local size used in creating the
2195        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2196        calculated if N is given) For square matrices n is almost always m.
2197 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2198 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2199 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2200            (same value is used for all local rows)
2201 .  d_nnz - array containing the number of nonzeros in the various rows of the
2202            DIAGONAL portion of the local submatrix (possibly different for each row)
2203            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2204            The size of this array is equal to the number of local rows, i.e 'm'.
2205            You must leave room for the diagonal entry even if it is zero.
2206 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2207            submatrix (same value is used for all local rows).
2208 -  o_nnz - array containing the number of nonzeros in the various rows of the
2209            OFF-DIAGONAL portion of the local submatrix (possibly different for
2210            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2211            structure. The size of this array is equal to the number
2212            of local rows, i.e 'm'.
2213 
2214    Output Parameter:
2215 .  A - the matrix
2216 
2217    Notes:
2218    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2219    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2220    storage requirements for this matrix.
2221 
2222    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2223    processor than it must be used on all processors that share the object for
2224    that argument.
2225 
2226    The AIJ format (also called the Yale sparse matrix format or
2227    compressed row storage), is fully compatible with standard Fortran 77
2228    storage.  That is, the stored row and column indices can begin at
2229    either one (as in Fortran) or zero.  See the users manual for details.
2230 
2231    The user MUST specify either the local or global matrix dimensions
2232    (possibly both).
2233 
2234    The parallel matrix is partitioned such that the first m0 rows belong to
2235    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2236    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2237 
2238    The DIAGONAL portion of the local submatrix of a processor can be defined
2239    as the submatrix which is obtained by extraction the part corresponding
2240    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2241    first row that belongs to the processor, and r2 is the last row belonging
2242    to the this processor. This is a square mxm matrix. The remaining portion
2243    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2244 
2245    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2246 
2247    By default, this format uses inodes (identical nodes) when possible.
2248    We search for consecutive rows with the same nonzero structure, thereby
2249    reusing matrix information to achieve increased efficiency.
2250 
2251    Options Database Keys:
2252 +  -mat_aij_no_inode  - Do not use inodes
2253 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2254 -  -mat_aij_oneindex - Internally use indexing starting at 1
2255         rather than 0.  Note that when calling MatSetValues(),
2256         the user still MUST index entries starting at 0!
2257 
2258 
2259    Example usage:
2260 
2261    Consider the following 8x8 matrix with 34 non-zero values, that is
2262    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2263    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2264    as follows:
2265 
2266 .vb
2267             1  2  0  |  0  3  0  |  0  4
2268     Proc0   0  5  6  |  7  0  0  |  8  0
2269             9  0 10  | 11  0  0  | 12  0
2270     -------------------------------------
2271            13  0 14  | 15 16 17  |  0  0
2272     Proc1   0 18  0  | 19 20 21  |  0  0
2273             0  0  0  | 22 23  0  | 24  0
2274     -------------------------------------
2275     Proc2  25 26 27  |  0  0 28  | 29  0
2276            30  0  0  | 31 32 33  |  0 34
2277 .ve
2278 
2279    This can be represented as a collection of submatrices as:
2280 
2281 .vb
2282       A B C
2283       D E F
2284       G H I
2285 .ve
2286 
2287    Where the submatrices A,B,C are owned by proc0, D,E,F are
2288    owned by proc1, G,H,I are owned by proc2.
2289 
2290    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2291    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2292    The 'M','N' parameters are 8,8, and have the same values on all procs.
2293 
2294    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2295    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2296    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2297    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2298    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2299    matrix, ans [DF] as another SeqAIJ matrix.
2300 
2301    When d_nz, o_nz parameters are specified, d_nz storage elements are
2302    allocated for every row of the local diagonal submatrix, and o_nz
2303    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2304    One way to choose d_nz and o_nz is to use the max nonzerors per local
2305    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2306    In this case, the values of d_nz,o_nz are:
2307 .vb
2308      proc0 : dnz = 2, o_nz = 2
2309      proc1 : dnz = 3, o_nz = 2
2310      proc2 : dnz = 1, o_nz = 4
2311 .ve
2312    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2313    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2314    for proc3. i.e we are using 12+15+10=37 storage locations to store
2315    34 values.
2316 
2317    When d_nnz, o_nnz parameters are specified, the storage is specified
2318    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2319    In the above case the values for d_nnz,o_nnz are:
2320 .vb
2321      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2322      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2323      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2324 .ve
2325    Here the space allocated is sum of all the above values i.e 34, and
2326    hence pre-allocation is perfect.
2327 
2328    Level: intermediate
2329 
2330 .keywords: matrix, aij, compressed row, sparse, parallel
2331 
2332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2333 @*/
2334 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2335 {
2336   int ierr,size;
2337 
2338   PetscFunctionBegin;
2339   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2340   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2341   if (size > 1) {
2342     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2343     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2344   } else {
2345     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2346     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2353 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2354 {
2355   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2356   PetscFunctionBegin;
2357   *Ad     = a->A;
2358   *Ao     = a->B;
2359   *colmap = a->garray;
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2365 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2366 {
2367   int        ierr;
2368   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2369 
2370   PetscFunctionBegin;
2371   if (coloring->ctype == IS_COLORING_LOCAL) {
2372     int        *allcolors,*colors,i;
2373     ISColoring ocoloring;
2374 
2375     /* set coloring for diagonal portion */
2376     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2377 
2378     /* set coloring for off-diagonal portion */
2379     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2380     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2381     for (i=0; i<a->B->n; i++) {
2382       colors[i] = allcolors[a->garray[i]];
2383     }
2384     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2385     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2386     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2387     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2388   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2389     int        *colors,i,*larray;
2390     ISColoring ocoloring;
2391 
2392     /* set coloring for diagonal portion */
2393     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2394     for (i=0; i<a->A->n; i++) {
2395       larray[i] = i + a->cstart;
2396     }
2397     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2398     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2399     for (i=0; i<a->A->n; i++) {
2400       colors[i] = coloring->colors[larray[i]];
2401     }
2402     ierr = PetscFree(larray);CHKERRQ(ierr);
2403     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2404     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2405     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2406 
2407     /* set coloring for off-diagonal portion */
2408     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2409     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2410     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2411     for (i=0; i<a->B->n; i++) {
2412       colors[i] = coloring->colors[larray[i]];
2413     }
2414     ierr = PetscFree(larray);CHKERRQ(ierr);
2415     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2416     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2417     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2418   } else {
2419     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2420   }
2421 
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2427 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2428 {
2429   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2430   int        ierr;
2431 
2432   PetscFunctionBegin;
2433   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2434   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 #undef __FUNCT__
2439 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2440 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2441 {
2442   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2443   int        ierr;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2447   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2448   PetscFunctionReturn(0);
2449 }
2450