xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fa3b22f8a6da1ce63e02ce485ee96787cb246aa2)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     PetscScalar *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832     This has to provide several versions.
833 
834      2) a) use only local smoothing updating outer values only once.
835         b) local smoothing updating outer values each inner iteration
836      3) color updating out values betwen colors.
837 */
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRelax_MPIAIJ"
840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
841 {
842   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
843   Mat          AA = mat->A,BB = mat->B;
844   Mat_SeqAIJ   *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
845   PetscScalar  *b,*x,*xs,*ls,d,*v,sum;
846   int          ierr,*idx,*diag;
847   int          n = matin->n,m = matin->m,i,shift = A->indexshift;
848 
849   PetscFunctionBegin;
850   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
851   diag = A->diag;
852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
855       PetscFunctionReturn(0);
856     }
857     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
858     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
859     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860     if (xx != bb) {
861       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862     } else {
863       b = x;
864     }
865     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
866     xs = x + shift; /* shift by one for index start of 1 */
867     ls = ls + shift;
868     while (its--) {
869       /* go down through the rows */
870       for (i=0; i<m; i++) {
871         n    = A->i[i+1] - A->i[i];
872 	PetscLogFlops(4*n+3);
873         idx  = A->j + A->i[i] + shift;
874         v    = A->a + A->i[i] + shift;
875         sum  = b[i];
876         SPARSEDENSEMDOT(sum,xs,v,idx,n);
877         d    = fshift + A->a[diag[i]+shift];
878         n    = B->i[i+1] - B->i[i];
879         idx  = B->j + B->i[i] + shift;
880         v    = B->a + B->i[i] + shift;
881         SPARSEDENSEMDOT(sum,ls,v,idx,n);
882         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
883       }
884       /* come up through the rows */
885       for (i=m-1; i>-1; i--) {
886         n    = A->i[i+1] - A->i[i];
887 	PetscLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899     }
900     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
902     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
903   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
904     if (flag & SOR_ZERO_INITIAL_GUESS) {
905       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
906       PetscFunctionReturn(0);
907     }
908     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
909     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
910     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
911     if (xx != bb) {
912       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
913     } else {
914       b = x;
915     }
916     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
917     xs = x + shift; /* shift by one for index start of 1 */
918     ls = ls + shift;
919     while (its--) {
920       for (i=0; i<m; i++) {
921         n    = A->i[i+1] - A->i[i];
922 	PetscLogFlops(4*n+3);
923         idx  = A->j + A->i[i] + shift;
924         v    = A->a + A->i[i] + shift;
925         sum  = b[i];
926         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927         d    = fshift + A->a[diag[i]+shift];
928         n    = B->i[i+1] - B->i[i];
929         idx  = B->j + B->i[i] + shift;
930         v    = B->a + B->i[i] + shift;
931         SPARSEDENSEMDOT(sum,ls,v,idx,n);
932         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
933       }
934     }
935     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
936     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
937     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
938   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
941       PetscFunctionReturn(0);
942     }
943     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
944     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
945     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
946     if (xx != bb) {
947       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
948     } else {
949       b = x;
950     }
951     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
952     xs = x + shift; /* shift by one for index start of 1 */
953     ls = ls + shift;
954     while (its--) {
955       for (i=m-1; i>-1; i--) {
956         n    = A->i[i+1] - A->i[i];
957 	PetscLogFlops(4*n+3);
958         idx  = A->j + A->i[i] + shift;
959         v    = A->a + A->i[i] + shift;
960         sum  = b[i];
961         SPARSEDENSEMDOT(sum,xs,v,idx,n);
962         d    = fshift + A->a[diag[i]+shift];
963         n    = B->i[i+1] - B->i[i];
964         idx  = B->j + B->i[i] + shift;
965         v    = B->a + B->i[i] + shift;
966         SPARSEDENSEMDOT(sum,ls,v,idx,n);
967         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
968       }
969     }
970     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
972     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   } else {
974     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatGetInfo_MPIAIJ"
981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
982 {
983   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
984   Mat        A = mat->A,B = mat->B;
985   int        ierr;
986   PetscReal  isend[5],irecv[5];
987 
988   PetscFunctionBegin;
989   info->block_size     = 1.0;
990   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
991   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
992   isend[3] = info->memory;  isend[4] = info->mallocs;
993   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
994   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
995   isend[3] += info->memory;  isend[4] += info->mallocs;
996   if (flag == MAT_LOCAL) {
997     info->nz_used      = isend[0];
998     info->nz_allocated = isend[1];
999     info->nz_unneeded  = isend[2];
1000     info->memory       = isend[3];
1001     info->mallocs      = isend[4];
1002   } else if (flag == MAT_GLOBAL_MAX) {
1003     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1004     info->nz_used      = irecv[0];
1005     info->nz_allocated = irecv[1];
1006     info->nz_unneeded  = irecv[2];
1007     info->memory       = irecv[3];
1008     info->mallocs      = irecv[4];
1009   } else if (flag == MAT_GLOBAL_SUM) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   }
1017   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1018   info->fill_ratio_needed = 0;
1019   info->factor_mallocs    = 0;
1020   info->rows_global       = (double)matin->M;
1021   info->columns_global    = (double)matin->N;
1022   info->rows_local        = (double)matin->m;
1023   info->columns_local     = (double)matin->N;
1024 
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatSetOption_MPIAIJ"
1030 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1033   int        ierr;
1034 
1035   PetscFunctionBegin;
1036   switch (op) {
1037   case MAT_NO_NEW_NONZERO_LOCATIONS:
1038   case MAT_YES_NEW_NONZERO_LOCATIONS:
1039   case MAT_COLUMNS_UNSORTED:
1040   case MAT_COLUMNS_SORTED:
1041   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1042   case MAT_KEEP_ZEROED_ROWS:
1043   case MAT_NEW_NONZERO_LOCATION_ERR:
1044   case MAT_USE_INODES:
1045   case MAT_DO_NOT_USE_INODES:
1046   case MAT_IGNORE_ZERO_ENTRIES:
1047     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1048     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1049     break;
1050   case MAT_ROW_ORIENTED:
1051     a->roworiented = PETSC_TRUE;
1052     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1053     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1054     break;
1055   case MAT_ROWS_SORTED:
1056   case MAT_ROWS_UNSORTED:
1057   case MAT_YES_NEW_DIAGONALS:
1058   case MAT_USE_SINGLE_PRECISION_SOLVES:
1059     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1060     break;
1061   case MAT_COLUMN_ORIENTED:
1062     a->roworiented = PETSC_FALSE;
1063     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1064     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1065     break;
1066   case MAT_IGNORE_OFF_PROC_ENTRIES:
1067     a->donotstash = PETSC_TRUE;
1068     break;
1069   case MAT_NO_NEW_DIAGONALS:
1070     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1071     break;
1072   default:
1073     SETERRQ(PETSC_ERR_SUP,"unknown option");
1074     break;
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRow_MPIAIJ"
1081 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1082 {
1083   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1084   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1085   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1086   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1087   int          *cmap,*idx_p;
1088 
1089   PetscFunctionBegin;
1090   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1091   mat->getrowactive = PETSC_TRUE;
1092 
1093   if (!mat->rowvalues && (idx || v)) {
1094     /*
1095         allocate enough space to hold information from the longest row.
1096     */
1097     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1098     int     max = 1,tmp;
1099     for (i=0; i<matin->m; i++) {
1100       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1101       if (max < tmp) { max = tmp; }
1102     }
1103     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1104     mat->rowindices = (int*)(mat->rowvalues + max);
1105   }
1106 
1107   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1108   lrow = row - rstart;
1109 
1110   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1111   if (!v)   {pvA = 0; pvB = 0;}
1112   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1113   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1114   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1115   nztot = nzA + nzB;
1116 
1117   cmap  = mat->garray;
1118   if (v  || idx) {
1119     if (nztot) {
1120       /* Sort by increasing column numbers, assuming A and B already sorted */
1121       int imark = -1;
1122       if (v) {
1123         *v = v_p = mat->rowvalues;
1124         for (i=0; i<nzB; i++) {
1125           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1126           else break;
1127         }
1128         imark = i;
1129         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1130         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1131       }
1132       if (idx) {
1133         *idx = idx_p = mat->rowindices;
1134         if (imark > -1) {
1135           for (i=0; i<imark; i++) {
1136             idx_p[i] = cmap[cworkB[i]];
1137           }
1138         } else {
1139           for (i=0; i<nzB; i++) {
1140             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1141             else break;
1142           }
1143           imark = i;
1144         }
1145         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1146         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1147       }
1148     } else {
1149       if (idx) *idx = 0;
1150       if (v)   *v   = 0;
1151     }
1152   }
1153   *nz = nztot;
1154   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1155   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1161 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1162 {
1163   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1164 
1165   PetscFunctionBegin;
1166   if (aij->getrowactive == PETSC_FALSE) {
1167     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1168   }
1169   aij->getrowactive = PETSC_FALSE;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatNorm_MPIAIJ"
1175 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1176 {
1177   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1178   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1179   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1180   PetscReal    sum = 0.0;
1181   PetscScalar  *v;
1182 
1183   PetscFunctionBegin;
1184   if (aij->size == 1) {
1185     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1186   } else {
1187     if (type == NORM_FROBENIUS) {
1188       v = amat->a;
1189       for (i=0; i<amat->nz; i++) {
1190 #if defined(PETSC_USE_COMPLEX)
1191         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1192 #else
1193         sum += (*v)*(*v); v++;
1194 #endif
1195       }
1196       v = bmat->a;
1197       for (i=0; i<bmat->nz; i++) {
1198 #if defined(PETSC_USE_COMPLEX)
1199         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1200 #else
1201         sum += (*v)*(*v); v++;
1202 #endif
1203       }
1204       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1205       *norm = sqrt(*norm);
1206     } else if (type == NORM_1) { /* max column norm */
1207       PetscReal *tmp,*tmp2;
1208       int    *jj,*garray = aij->garray;
1209       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1210       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1211       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1212       *norm = 0.0;
1213       v = amat->a; jj = amat->j;
1214       for (j=0; j<amat->nz; j++) {
1215         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1216       }
1217       v = bmat->a; jj = bmat->j;
1218       for (j=0; j<bmat->nz; j++) {
1219         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1220       }
1221       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1222       for (j=0; j<mat->N; j++) {
1223         if (tmp2[j] > *norm) *norm = tmp2[j];
1224       }
1225       ierr = PetscFree(tmp);CHKERRQ(ierr);
1226       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1227     } else if (type == NORM_INFINITY) { /* max row norm */
1228       PetscReal ntemp = 0.0;
1229       for (j=0; j<aij->A->m; j++) {
1230         v = amat->a + amat->i[j] + shift;
1231         sum = 0.0;
1232         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1233           sum += PetscAbsScalar(*v); v++;
1234         }
1235         v = bmat->a + bmat->i[j] + shift;
1236         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1237           sum += PetscAbsScalar(*v); v++;
1238         }
1239         if (sum > ntemp) ntemp = sum;
1240       }
1241       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1242     } else {
1243       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1244     }
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatTranspose_MPIAIJ"
1251 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1252 {
1253   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1254   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1255   int          ierr,shift = Aloc->indexshift;
1256   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1257   Mat          B;
1258   PetscScalar  *array;
1259 
1260   PetscFunctionBegin;
1261   if (!matout && M != N) {
1262     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1263   }
1264 
1265   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1266 
1267   /* copy over the A part */
1268   Aloc = (Mat_SeqAIJ*)a->A->data;
1269   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1270   row = a->rstart;
1271   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1272   for (i=0; i<m; i++) {
1273     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1274     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1275   }
1276   aj = Aloc->j;
1277   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1278 
1279   /* copy over the B part */
1280   Aloc = (Mat_SeqAIJ*)a->B->data;
1281   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1282   row  = a->rstart;
1283   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1284   ct   = cols;
1285   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1286   for (i=0; i<m; i++) {
1287     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1288     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1289   }
1290   ierr = PetscFree(ct);CHKERRQ(ierr);
1291   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1293   if (matout) {
1294     *matout = B;
1295   } else {
1296     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1303 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1304 {
1305   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1306   Mat        a = aij->A,b = aij->B;
1307   int        ierr,s1,s2,s3;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1311   if (rr) {
1312     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1313     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1314     /* Overlap communication with computation. */
1315     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1316   }
1317   if (ll) {
1318     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1319     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1320     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1321   }
1322   /* scale  the diagonal block */
1323   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1324 
1325   if (rr) {
1326     /* Do a scatter end and then right scale the off-diagonal block */
1327     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1328     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1329   }
1330 
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1337 int MatPrintHelp_MPIAIJ(Mat A)
1338 {
1339   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1340   int        ierr;
1341 
1342   PetscFunctionBegin;
1343   if (!a->rank) {
1344     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1351 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1352 {
1353   PetscFunctionBegin;
1354   *bs = 1;
1355   PetscFunctionReturn(0);
1356 }
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1359 int MatSetUnfactored_MPIAIJ(Mat A)
1360 {
1361   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1362   int        ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatEqual_MPIAIJ"
1371 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1372 {
1373   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1374   Mat        a,b,c,d;
1375   PetscTruth flg;
1376   int        ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1380   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1381   a = matA->A; b = matA->B;
1382   c = matB->A; d = matB->B;
1383 
1384   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1385   if (flg == PETSC_TRUE) {
1386     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1387   }
1388   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatCopy_MPIAIJ"
1394 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1395 {
1396   int        ierr;
1397   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1398   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1399   PetscTruth flg;
1400 
1401   PetscFunctionBegin;
1402   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1403   if (str != SAME_NONZERO_PATTERN || !flg) {
1404     /* because of the column compression in the off-processor part of the matrix a->B,
1405        the number of columns in a->B and b->B may be different, hence we cannot call
1406        the MatCopy() directly on the two parts. If need be, we can provide a more
1407        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1408        then copying the submatrices */
1409     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1410   } else {
1411     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1412     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1419 int MatSetUpPreallocation_MPIAIJ(Mat A)
1420 {
1421   int        ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1429 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1430 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1431 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1432 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1433 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1434 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1435 #endif
1436 
1437 /* -------------------------------------------------------------------*/
1438 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1439        MatGetRow_MPIAIJ,
1440        MatRestoreRow_MPIAIJ,
1441        MatMult_MPIAIJ,
1442        MatMultAdd_MPIAIJ,
1443        MatMultTranspose_MPIAIJ,
1444        MatMultTransposeAdd_MPIAIJ,
1445        0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        0,
1451        MatRelax_MPIAIJ,
1452        MatTranspose_MPIAIJ,
1453        MatGetInfo_MPIAIJ,
1454        MatEqual_MPIAIJ,
1455        MatGetDiagonal_MPIAIJ,
1456        MatDiagonalScale_MPIAIJ,
1457        MatNorm_MPIAIJ,
1458        MatAssemblyBegin_MPIAIJ,
1459        MatAssemblyEnd_MPIAIJ,
1460        0,
1461        MatSetOption_MPIAIJ,
1462        MatZeroEntries_MPIAIJ,
1463        MatZeroRows_MPIAIJ,
1464 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1465 				       MatLUFactorSymbolic_MPIAIJ_TFS,
1466 #else
1467        0,
1468 #endif
1469        0,
1470        0,
1471        0,
1472        MatSetUpPreallocation_MPIAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477        0,
1478        MatDuplicate_MPIAIJ,
1479        0,
1480        0,
1481        0,
1482        0,
1483        0,
1484        MatGetSubMatrices_MPIAIJ,
1485        MatIncreaseOverlap_MPIAIJ,
1486        MatGetValues_MPIAIJ,
1487        MatCopy_MPIAIJ,
1488        MatPrintHelp_MPIAIJ,
1489        MatScale_MPIAIJ,
1490        0,
1491        0,
1492        0,
1493        MatGetBlockSize_MPIAIJ,
1494        0,
1495        0,
1496        0,
1497        0,
1498        MatFDColoringCreate_MPIAIJ,
1499        0,
1500        MatSetUnfactored_MPIAIJ,
1501        0,
1502        0,
1503        MatGetSubMatrix_MPIAIJ,
1504        MatDestroy_MPIAIJ,
1505        MatView_MPIAIJ,
1506        MatGetPetscMaps_Petsc,
1507        0,
1508        0,
1509        0,
1510        0,
1511        0,
1512        0,
1513        0,
1514        0,
1515        MatSetColoring_MPIAIJ,
1516        MatSetValuesAdic_MPIAIJ,
1517        MatSetValuesAdifor_MPIAIJ
1518 };
1519 
1520 /* ----------------------------------------------------------------------------------------*/
1521 
1522 EXTERN_C_BEGIN
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1525 int MatStoreValues_MPIAIJ(Mat mat)
1526 {
1527   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1528   int        ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1532   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1533   PetscFunctionReturn(0);
1534 }
1535 EXTERN_C_END
1536 
1537 EXTERN_C_BEGIN
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1540 int MatRetrieveValues_MPIAIJ(Mat mat)
1541 {
1542   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1543   int        ierr;
1544 
1545   PetscFunctionBegin;
1546   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1547   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 EXTERN_C_END
1551 
1552 #include "petscpc.h"
1553 EXTERN_C_BEGIN
1554 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1555 EXTERN_C_END
1556 
1557 EXTERN_C_BEGIN
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatCreate_MPIAIJ"
1560 int MatCreate_MPIAIJ(Mat B)
1561 {
1562   Mat_MPIAIJ   *b;
1563   int          ierr,i,size;
1564 
1565   PetscFunctionBegin;
1566   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1567 
1568   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1569   B->data         = (void*)b;
1570   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1571   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1572   B->factor       = 0;
1573   B->assembled    = PETSC_FALSE;
1574   B->mapping      = 0;
1575 
1576   B->insertmode      = NOT_SET_VALUES;
1577   b->size            = size;
1578   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1579 
1580   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1581   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1582 
1583   /* the information in the maps duplicates the information computed below, eventually
1584      we should remove the duplicate information that is not contained in the maps */
1585   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1586   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1587 
1588   /* build local table of row and column ownerships */
1589   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1590   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1591   b->cowners = b->rowners + b->size + 2;
1592   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1593   b->rowners[0] = 0;
1594   for (i=2; i<=b->size; i++) {
1595     b->rowners[i] += b->rowners[i-1];
1596   }
1597   b->rstart = b->rowners[b->rank];
1598   b->rend   = b->rowners[b->rank+1];
1599   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1600   b->cowners[0] = 0;
1601   for (i=2; i<=b->size; i++) {
1602     b->cowners[i] += b->cowners[i-1];
1603   }
1604   b->cstart = b->cowners[b->rank];
1605   b->cend   = b->cowners[b->rank+1];
1606 
1607   /* build cache for off array entries formed */
1608   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1609   b->donotstash  = PETSC_FALSE;
1610   b->colmap      = 0;
1611   b->garray      = 0;
1612   b->roworiented = PETSC_TRUE;
1613 
1614   /* stuff used for matrix vector multiply */
1615   b->lvec      = PETSC_NULL;
1616   b->Mvctx     = PETSC_NULL;
1617 
1618   /* stuff for MatGetRow() */
1619   b->rowindices   = 0;
1620   b->rowvalues    = 0;
1621   b->getrowactive = PETSC_FALSE;
1622 
1623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1624                                      "MatStoreValues_MPIAIJ",
1625                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1627                                      "MatRetrieveValues_MPIAIJ",
1628                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1630                                      "MatGetDiagonalBlock_MPIAIJ",
1631                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1638 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1639 {
1640   Mat        mat;
1641   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1642   int        ierr;
1643 
1644   PetscFunctionBegin;
1645   *newmat       = 0;
1646   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1647   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1648   a    = (Mat_MPIAIJ*)mat->data;
1649   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1650   mat->factor       = matin->factor;
1651   mat->assembled    = PETSC_TRUE;
1652   mat->insertmode   = NOT_SET_VALUES;
1653   mat->preallocated = PETSC_TRUE;
1654 
1655   a->rstart       = oldmat->rstart;
1656   a->rend         = oldmat->rend;
1657   a->cstart       = oldmat->cstart;
1658   a->cend         = oldmat->cend;
1659   a->size         = oldmat->size;
1660   a->rank         = oldmat->rank;
1661   a->donotstash   = oldmat->donotstash;
1662   a->roworiented  = oldmat->roworiented;
1663   a->rowindices   = 0;
1664   a->rowvalues    = 0;
1665   a->getrowactive = PETSC_FALSE;
1666 
1667   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1668   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1669   if (oldmat->colmap) {
1670 #if defined (PETSC_USE_CTABLE)
1671     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1672 #else
1673     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1674     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1675     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1676 #endif
1677   } else a->colmap = 0;
1678   if (oldmat->garray) {
1679     int len;
1680     len  = oldmat->B->n;
1681     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1682     PetscLogObjectMemory(mat,len*sizeof(int));
1683     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1684   } else a->garray = 0;
1685 
1686   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1687   PetscLogObjectParent(mat,a->lvec);
1688   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1689   PetscLogObjectParent(mat,a->Mvctx);
1690   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1691   PetscLogObjectParent(mat,a->A);
1692   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1693   PetscLogObjectParent(mat,a->B);
1694   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1695   *newmat = mat;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #include "petscsys.h"
1700 
1701 EXTERN_C_BEGIN
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatLoad_MPIAIJ"
1704 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1705 {
1706   Mat          A;
1707   PetscScalar  *vals,*svals;
1708   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1709   MPI_Status   status;
1710   int          i,nz,ierr,j,rstart,rend,fd;
1711   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1712   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1713   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1714 
1715   PetscFunctionBegin;
1716   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1717   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1718   if (!rank) {
1719     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1720     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1721     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1722     if (header[3] < 0) {
1723       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1724     }
1725   }
1726 
1727   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1728   M = header[1]; N = header[2];
1729   /* determine ownership of all rows */
1730   m = M/size + ((M % size) > rank);
1731   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1732   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1733   rowners[0] = 0;
1734   for (i=2; i<=size; i++) {
1735     rowners[i] += rowners[i-1];
1736   }
1737   rstart = rowners[rank];
1738   rend   = rowners[rank+1];
1739 
1740   /* distribute row lengths to all processors */
1741   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1742   offlens = ourlens + (rend-rstart);
1743   if (!rank) {
1744     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1745     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1746     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1747     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1748     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1749     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1750   } else {
1751     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1752   }
1753 
1754   if (!rank) {
1755     /* calculate the number of nonzeros on each processor */
1756     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1757     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1758     for (i=0; i<size; i++) {
1759       for (j=rowners[i]; j< rowners[i+1]; j++) {
1760         procsnz[i] += rowlengths[j];
1761       }
1762     }
1763     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1764 
1765     /* determine max buffer needed and allocate it */
1766     maxnz = 0;
1767     for (i=0; i<size; i++) {
1768       maxnz = PetscMax(maxnz,procsnz[i]);
1769     }
1770     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1771 
1772     /* read in my part of the matrix column indices  */
1773     nz   = procsnz[0];
1774     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1775     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1776 
1777     /* read in every one elses and ship off */
1778     for (i=1; i<size; i++) {
1779       nz   = procsnz[i];
1780       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1781       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1782     }
1783     ierr = PetscFree(cols);CHKERRQ(ierr);
1784   } else {
1785     /* determine buffer space needed for message */
1786     nz = 0;
1787     for (i=0; i<m; i++) {
1788       nz += ourlens[i];
1789     }
1790     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1791 
1792     /* receive message of column indices*/
1793     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1794     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1795     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1796   }
1797 
1798   /* determine column ownership if matrix is not square */
1799   if (N != M) {
1800     n      = N/size + ((N % size) > rank);
1801     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1802     cstart = cend - n;
1803   } else {
1804     cstart = rstart;
1805     cend   = rend;
1806     n      = cend - cstart;
1807   }
1808 
1809   /* loop over local rows, determining number of off diagonal entries */
1810   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1811   jj = 0;
1812   for (i=0; i<m; i++) {
1813     for (j=0; j<ourlens[i]; j++) {
1814       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1815       jj++;
1816     }
1817   }
1818 
1819   /* create our matrix */
1820   for (i=0; i<m; i++) {
1821     ourlens[i] -= offlens[i];
1822   }
1823   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1824   A = *newmat;
1825   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1826   for (i=0; i<m; i++) {
1827     ourlens[i] += offlens[i];
1828   }
1829 
1830   if (!rank) {
1831     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1832 
1833     /* read in my part of the matrix numerical values  */
1834     nz   = procsnz[0];
1835     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1836 
1837     /* insert into matrix */
1838     jj      = rstart;
1839     smycols = mycols;
1840     svals   = vals;
1841     for (i=0; i<m; i++) {
1842       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1843       smycols += ourlens[i];
1844       svals   += ourlens[i];
1845       jj++;
1846     }
1847 
1848     /* read in other processors and ship out */
1849     for (i=1; i<size; i++) {
1850       nz   = procsnz[i];
1851       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1852       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1853     }
1854     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1855   } else {
1856     /* receive numeric values */
1857     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1858 
1859     /* receive message of values*/
1860     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1861     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1862     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1863 
1864     /* insert into matrix */
1865     jj      = rstart;
1866     smycols = mycols;
1867     svals   = vals;
1868     for (i=0; i<m; i++) {
1869       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1870       smycols += ourlens[i];
1871       svals   += ourlens[i];
1872       jj++;
1873     }
1874   }
1875   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1876   ierr = PetscFree(vals);CHKERRQ(ierr);
1877   ierr = PetscFree(mycols);CHKERRQ(ierr);
1878   ierr = PetscFree(rowners);CHKERRQ(ierr);
1879 
1880   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882   PetscFunctionReturn(0);
1883 }
1884 EXTERN_C_END
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1888 /*
1889     Not great since it makes two copies of the submatrix, first an SeqAIJ
1890   in local and then by concatenating the local matrices the end result.
1891   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1892 */
1893 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1894 {
1895   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1896   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1897   Mat          *local,M,Mreuse;
1898   PetscScalar  *vwork,*aa;
1899   MPI_Comm     comm = mat->comm;
1900   Mat_SeqAIJ   *aij;
1901 
1902 
1903   PetscFunctionBegin;
1904   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1905   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1906 
1907   if (call ==  MAT_REUSE_MATRIX) {
1908     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1909     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1910     local = &Mreuse;
1911     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1912   } else {
1913     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1914     Mreuse = *local;
1915     ierr   = PetscFree(local);CHKERRQ(ierr);
1916   }
1917 
1918   /*
1919       m - number of local rows
1920       n - number of columns (same on all processors)
1921       rstart - first row in new global matrix generated
1922   */
1923   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1924   if (call == MAT_INITIAL_MATRIX) {
1925     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1926     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1927     ii  = aij->i;
1928     jj  = aij->j;
1929 
1930     /*
1931         Determine the number of non-zeros in the diagonal and off-diagonal
1932         portions of the matrix in order to do correct preallocation
1933     */
1934 
1935     /* first get start and end of "diagonal" columns */
1936     if (csize == PETSC_DECIDE) {
1937       nlocal = n/size + ((n % size) > rank);
1938     } else {
1939       nlocal = csize;
1940     }
1941     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1942     rstart = rend - nlocal;
1943     if (rank == size - 1 && rend != n) {
1944       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1945     }
1946 
1947     /* next, compute all the lengths */
1948     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1949     olens = dlens + m;
1950     for (i=0; i<m; i++) {
1951       jend = ii[i+1] - ii[i];
1952       olen = 0;
1953       dlen = 0;
1954       for (j=0; j<jend; j++) {
1955         if (*jj < rstart || *jj >= rend) olen++;
1956         else dlen++;
1957         jj++;
1958       }
1959       olens[i] = olen;
1960       dlens[i] = dlen;
1961     }
1962     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1963     ierr = PetscFree(dlens);CHKERRQ(ierr);
1964   } else {
1965     int ml,nl;
1966 
1967     M = *newmat;
1968     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1969     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1970     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1971     /*
1972          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1973        rather than the slower MatSetValues().
1974     */
1975     M->was_assembled = PETSC_TRUE;
1976     M->assembled     = PETSC_FALSE;
1977   }
1978   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1979   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1980   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1981   ii  = aij->i;
1982   jj  = aij->j;
1983   aa  = aij->a;
1984   for (i=0; i<m; i++) {
1985     row   = rstart + i;
1986     nz    = ii[i+1] - ii[i];
1987     cwork = jj;     jj += nz;
1988     vwork = aa;     aa += nz;
1989     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1990   }
1991 
1992   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   *newmat = M;
1995 
1996   /* save submatrix used in processor for next request */
1997   if (call ==  MAT_INITIAL_MATRIX) {
1998     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1999     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2000   }
2001 
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2007 /*@C
2008    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2009    (the default parallel PETSc format).  For good matrix assembly performance
2010    the user should preallocate the matrix storage by setting the parameters
2011    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2012    performance can be increased by more than a factor of 50.
2013 
2014    Collective on MPI_Comm
2015 
2016    Input Parameters:
2017 +  A - the matrix
2018 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2019            (same value is used for all local rows)
2020 .  d_nnz - array containing the number of nonzeros in the various rows of the
2021            DIAGONAL portion of the local submatrix (possibly different for each row)
2022            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2023            The size of this array is equal to the number of local rows, i.e 'm'.
2024            You must leave room for the diagonal entry even if it is zero.
2025 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2026            submatrix (same value is used for all local rows).
2027 -  o_nnz - array containing the number of nonzeros in the various rows of the
2028            OFF-DIAGONAL portion of the local submatrix (possibly different for
2029            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2030            structure. The size of this array is equal to the number
2031            of local rows, i.e 'm'.
2032 
2033    The AIJ format (also called the Yale sparse matrix format or
2034    compressed row storage), is fully compatible with standard Fortran 77
2035    storage.  That is, the stored row and column indices can begin at
2036    either one (as in Fortran) or zero.  See the users manual for details.
2037 
2038    The user MUST specify either the local or global matrix dimensions
2039    (possibly both).
2040 
2041    The parallel matrix is partitioned such that the first m0 rows belong to
2042    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2043    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2044 
2045    The DIAGONAL portion of the local submatrix of a processor can be defined
2046    as the submatrix which is obtained by extraction the part corresponding
2047    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2048    first row that belongs to the processor, and r2 is the last row belonging
2049    to the this processor. This is a square mxm matrix. The remaining portion
2050    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2051 
2052    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2053 
2054    By default, this format uses inodes (identical nodes) when possible.
2055    We search for consecutive rows with the same nonzero structure, thereby
2056    reusing matrix information to achieve increased efficiency.
2057 
2058    Options Database Keys:
2059 +  -mat_aij_no_inode  - Do not use inodes
2060 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2061 -  -mat_aij_oneindex - Internally use indexing starting at 1
2062         rather than 0.  Note that when calling MatSetValues(),
2063         the user still MUST index entries starting at 0!
2064 
2065    Example usage:
2066 
2067    Consider the following 8x8 matrix with 34 non-zero values, that is
2068    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2069    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2070    as follows:
2071 
2072 .vb
2073             1  2  0  |  0  3  0  |  0  4
2074     Proc0   0  5  6  |  7  0  0  |  8  0
2075             9  0 10  | 11  0  0  | 12  0
2076     -------------------------------------
2077            13  0 14  | 15 16 17  |  0  0
2078     Proc1   0 18  0  | 19 20 21  |  0  0
2079             0  0  0  | 22 23  0  | 24  0
2080     -------------------------------------
2081     Proc2  25 26 27  |  0  0 28  | 29  0
2082            30  0  0  | 31 32 33  |  0 34
2083 .ve
2084 
2085    This can be represented as a collection of submatrices as:
2086 
2087 .vb
2088       A B C
2089       D E F
2090       G H I
2091 .ve
2092 
2093    Where the submatrices A,B,C are owned by proc0, D,E,F are
2094    owned by proc1, G,H,I are owned by proc2.
2095 
2096    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2097    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2098    The 'M','N' parameters are 8,8, and have the same values on all procs.
2099 
2100    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2101    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2102    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2103    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2104    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2105    matrix, ans [DF] as another SeqAIJ matrix.
2106 
2107    When d_nz, o_nz parameters are specified, d_nz storage elements are
2108    allocated for every row of the local diagonal submatrix, and o_nz
2109    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2110    One way to choose d_nz and o_nz is to use the max nonzerors per local
2111    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2112    In this case, the values of d_nz,o_nz are:
2113 .vb
2114      proc0 : dnz = 2, o_nz = 2
2115      proc1 : dnz = 3, o_nz = 2
2116      proc2 : dnz = 1, o_nz = 4
2117 .ve
2118    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2119    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2120    for proc3. i.e we are using 12+15+10=37 storage locations to store
2121    34 values.
2122 
2123    When d_nnz, o_nnz parameters are specified, the storage is specified
2124    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2125    In the above case the values for d_nnz,o_nnz are:
2126 .vb
2127      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2128      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2129      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2130 .ve
2131    Here the space allocated is sum of all the above values i.e 34, and
2132    hence pre-allocation is perfect.
2133 
2134    Level: intermediate
2135 
2136 .keywords: matrix, aij, compressed row, sparse, parallel
2137 
2138 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2139 @*/
2140 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2141 {
2142   Mat_MPIAIJ   *b;
2143   int          ierr,i;
2144   PetscTruth   flg2;
2145 
2146   PetscFunctionBegin;
2147   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2148   if (!flg2) PetscFunctionReturn(0);
2149   B->preallocated = PETSC_TRUE;
2150   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2151   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2152   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2153   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2154   if (d_nnz) {
2155     for (i=0; i<B->m; i++) {
2156       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2157     }
2158   }
2159   if (o_nnz) {
2160     for (i=0; i<B->m; i++) {
2161       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2162     }
2163   }
2164   b = (Mat_MPIAIJ*)B->data;
2165 
2166   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2167   PetscLogObjectParent(B,b->A);
2168   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2169   PetscLogObjectParent(B,b->B);
2170 
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatCreateMPIAIJ"
2176 /*@C
2177    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2178    (the default parallel PETSc format).  For good matrix assembly performance
2179    the user should preallocate the matrix storage by setting the parameters
2180    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2181    performance can be increased by more than a factor of 50.
2182 
2183    Collective on MPI_Comm
2184 
2185    Input Parameters:
2186 +  comm - MPI communicator
2187 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2188            This value should be the same as the local size used in creating the
2189            y vector for the matrix-vector product y = Ax.
2190 .  n - This value should be the same as the local size used in creating the
2191        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2192        calculated if N is given) For square matrices n is almost always m.
2193 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2194 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2195 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2196            (same value is used for all local rows)
2197 .  d_nnz - array containing the number of nonzeros in the various rows of the
2198            DIAGONAL portion of the local submatrix (possibly different for each row)
2199            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2200            The size of this array is equal to the number of local rows, i.e 'm'.
2201            You must leave room for the diagonal entry even if it is zero.
2202 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2203            submatrix (same value is used for all local rows).
2204 -  o_nnz - array containing the number of nonzeros in the various rows of the
2205            OFF-DIAGONAL portion of the local submatrix (possibly different for
2206            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2207            structure. The size of this array is equal to the number
2208            of local rows, i.e 'm'.
2209 
2210    Output Parameter:
2211 .  A - the matrix
2212 
2213    Notes:
2214    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2215    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2216    storage requirements for this matrix.
2217 
2218    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2219    processor than it must be used on all processors that share the object for
2220    that argument.
2221 
2222    The AIJ format (also called the Yale sparse matrix format or
2223    compressed row storage), is fully compatible with standard Fortran 77
2224    storage.  That is, the stored row and column indices can begin at
2225    either one (as in Fortran) or zero.  See the users manual for details.
2226 
2227    The user MUST specify either the local or global matrix dimensions
2228    (possibly both).
2229 
2230    The parallel matrix is partitioned such that the first m0 rows belong to
2231    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2232    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2233 
2234    The DIAGONAL portion of the local submatrix of a processor can be defined
2235    as the submatrix which is obtained by extraction the part corresponding
2236    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2237    first row that belongs to the processor, and r2 is the last row belonging
2238    to the this processor. This is a square mxm matrix. The remaining portion
2239    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2240 
2241    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2242 
2243    By default, this format uses inodes (identical nodes) when possible.
2244    We search for consecutive rows with the same nonzero structure, thereby
2245    reusing matrix information to achieve increased efficiency.
2246 
2247    Options Database Keys:
2248 +  -mat_aij_no_inode  - Do not use inodes
2249 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2250 -  -mat_aij_oneindex - Internally use indexing starting at 1
2251         rather than 0.  Note that when calling MatSetValues(),
2252         the user still MUST index entries starting at 0!
2253 
2254 
2255    Example usage:
2256 
2257    Consider the following 8x8 matrix with 34 non-zero values, that is
2258    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2259    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2260    as follows:
2261 
2262 .vb
2263             1  2  0  |  0  3  0  |  0  4
2264     Proc0   0  5  6  |  7  0  0  |  8  0
2265             9  0 10  | 11  0  0  | 12  0
2266     -------------------------------------
2267            13  0 14  | 15 16 17  |  0  0
2268     Proc1   0 18  0  | 19 20 21  |  0  0
2269             0  0  0  | 22 23  0  | 24  0
2270     -------------------------------------
2271     Proc2  25 26 27  |  0  0 28  | 29  0
2272            30  0  0  | 31 32 33  |  0 34
2273 .ve
2274 
2275    This can be represented as a collection of submatrices as:
2276 
2277 .vb
2278       A B C
2279       D E F
2280       G H I
2281 .ve
2282 
2283    Where the submatrices A,B,C are owned by proc0, D,E,F are
2284    owned by proc1, G,H,I are owned by proc2.
2285 
2286    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2287    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2288    The 'M','N' parameters are 8,8, and have the same values on all procs.
2289 
2290    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2291    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2292    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2293    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2294    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2295    matrix, ans [DF] as another SeqAIJ matrix.
2296 
2297    When d_nz, o_nz parameters are specified, d_nz storage elements are
2298    allocated for every row of the local diagonal submatrix, and o_nz
2299    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2300    One way to choose d_nz and o_nz is to use the max nonzerors per local
2301    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2302    In this case, the values of d_nz,o_nz are:
2303 .vb
2304      proc0 : dnz = 2, o_nz = 2
2305      proc1 : dnz = 3, o_nz = 2
2306      proc2 : dnz = 1, o_nz = 4
2307 .ve
2308    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2309    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2310    for proc3. i.e we are using 12+15+10=37 storage locations to store
2311    34 values.
2312 
2313    When d_nnz, o_nnz parameters are specified, the storage is specified
2314    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2315    In the above case the values for d_nnz,o_nnz are:
2316 .vb
2317      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2318      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2319      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2320 .ve
2321    Here the space allocated is sum of all the above values i.e 34, and
2322    hence pre-allocation is perfect.
2323 
2324    Level: intermediate
2325 
2326 .keywords: matrix, aij, compressed row, sparse, parallel
2327 
2328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2329 @*/
2330 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2331 {
2332   int ierr,size;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2336   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2337   if (size > 1) {
2338     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2339     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2340   } else {
2341     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2342     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2343   }
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2349 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2350 {
2351   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2352   PetscFunctionBegin;
2353   *Ad     = a->A;
2354   *Ao     = a->B;
2355   *colmap = a->garray;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2361 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2362 {
2363   int        ierr;
2364   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2365 
2366   PetscFunctionBegin;
2367   if (coloring->ctype == IS_COLORING_LOCAL) {
2368     int        *allcolors,*colors,i;
2369     ISColoring ocoloring;
2370 
2371     /* set coloring for diagonal portion */
2372     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2373 
2374     /* set coloring for off-diagonal portion */
2375     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2376     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2377     for (i=0; i<a->B->n; i++) {
2378       colors[i] = allcolors[a->garray[i]];
2379     }
2380     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2381     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2382     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2383     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2384   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2385     int        *colors,i,*larray;
2386     ISColoring ocoloring;
2387 
2388     /* set coloring for diagonal portion */
2389     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2390     for (i=0; i<a->A->n; i++) {
2391       larray[i] = i + a->cstart;
2392     }
2393     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2394     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2395     for (i=0; i<a->A->n; i++) {
2396       colors[i] = coloring->colors[larray[i]];
2397     }
2398     ierr = PetscFree(larray);CHKERRQ(ierr);
2399     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2400     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2401     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2402 
2403     /* set coloring for off-diagonal portion */
2404     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2405     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2406     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2407     for (i=0; i<a->B->n; i++) {
2408       colors[i] = coloring->colors[larray[i]];
2409     }
2410     ierr = PetscFree(larray);CHKERRQ(ierr);
2411     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2412     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2413     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2414   } else {
2415     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2416   }
2417 
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2423 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2424 {
2425   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2426   int        ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2430   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2436 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2437 {
2438   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2439   int        ierr;
2440 
2441   PetscFunctionBegin;
2442   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2443   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446