xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d643ce63cf9b1fe2cd4ab306b8154e7105d3fb3c)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     PetscScalar *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832     This has to provide several versions.
833 
834      2) a) use only local smoothing updating outer values only once.
835         b) local smoothing updating outer values each inner iteration
836      3) color updating out values betwen colors.
837 */
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRelax_MPIAIJ"
840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
841 {
842   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
843   Mat          AA = mat->A,BB = mat->B;
844   Mat_SeqAIJ   *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
845   PetscScalar  *b,*x,*xs,*ls,d,*v,sum;
846   int          ierr,*idx,*diag;
847   int          n = matin->n,m = matin->m,i,shift = A->indexshift;
848 
849   PetscFunctionBegin;
850   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
851   diag = A->diag;
852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
855       PetscFunctionReturn(0);
856     }
857     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
858     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
859     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860     if (xx != bb) {
861       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862     } else {
863       b = x;
864     }
865     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
866     xs = x + shift; /* shift by one for index start of 1 */
867     ls = ls + shift;
868     while (its--) {
869       /* go down through the rows */
870       for (i=0; i<m; i++) {
871         n    = A->i[i+1] - A->i[i];
872 	PetscLogFlops(4*n+3);
873         idx  = A->j + A->i[i] + shift;
874         v    = A->a + A->i[i] + shift;
875         sum  = b[i];
876         SPARSEDENSEMDOT(sum,xs,v,idx,n);
877         d    = fshift + A->a[diag[i]+shift];
878         n    = B->i[i+1] - B->i[i];
879         idx  = B->j + B->i[i] + shift;
880         v    = B->a + B->i[i] + shift;
881         SPARSEDENSEMDOT(sum,ls,v,idx,n);
882         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
883       }
884       /* come up through the rows */
885       for (i=m-1; i>-1; i--) {
886         n    = A->i[i+1] - A->i[i];
887 	PetscLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899     }
900     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
902     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
903   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
904     if (flag & SOR_ZERO_INITIAL_GUESS) {
905       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
906       PetscFunctionReturn(0);
907     }
908     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
909     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
910     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
911     if (xx != bb) {
912       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
913     } else {
914       b = x;
915     }
916     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
917     xs = x + shift; /* shift by one for index start of 1 */
918     ls = ls + shift;
919     while (its--) {
920       for (i=0; i<m; i++) {
921         n    = A->i[i+1] - A->i[i];
922 	PetscLogFlops(4*n+3);
923         idx  = A->j + A->i[i] + shift;
924         v    = A->a + A->i[i] + shift;
925         sum  = b[i];
926         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927         d    = fshift + A->a[diag[i]+shift];
928         n    = B->i[i+1] - B->i[i];
929         idx  = B->j + B->i[i] + shift;
930         v    = B->a + B->i[i] + shift;
931         SPARSEDENSEMDOT(sum,ls,v,idx,n);
932         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
933       }
934     }
935     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
936     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
937     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
938   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
941       PetscFunctionReturn(0);
942     }
943     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
944     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
945     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
946     if (xx != bb) {
947       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
948     } else {
949       b = x;
950     }
951     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
952     xs = x + shift; /* shift by one for index start of 1 */
953     ls = ls + shift;
954     while (its--) {
955       for (i=m-1; i>-1; i--) {
956         n    = A->i[i+1] - A->i[i];
957 	PetscLogFlops(4*n+3);
958         idx  = A->j + A->i[i] + shift;
959         v    = A->a + A->i[i] + shift;
960         sum  = b[i];
961         SPARSEDENSEMDOT(sum,xs,v,idx,n);
962         d    = fshift + A->a[diag[i]+shift];
963         n    = B->i[i+1] - B->i[i];
964         idx  = B->j + B->i[i] + shift;
965         v    = B->a + B->i[i] + shift;
966         SPARSEDENSEMDOT(sum,ls,v,idx,n);
967         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
968       }
969     }
970     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
972     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   } else {
974     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatGetInfo_MPIAIJ"
981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
982 {
983   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
984   Mat        A = mat->A,B = mat->B;
985   int        ierr;
986   PetscReal  isend[5],irecv[5];
987 
988   PetscFunctionBegin;
989   info->block_size     = 1.0;
990   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
991   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
992   isend[3] = info->memory;  isend[4] = info->mallocs;
993   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
994   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
995   isend[3] += info->memory;  isend[4] += info->mallocs;
996   if (flag == MAT_LOCAL) {
997     info->nz_used      = isend[0];
998     info->nz_allocated = isend[1];
999     info->nz_unneeded  = isend[2];
1000     info->memory       = isend[3];
1001     info->mallocs      = isend[4];
1002   } else if (flag == MAT_GLOBAL_MAX) {
1003     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1004     info->nz_used      = irecv[0];
1005     info->nz_allocated = irecv[1];
1006     info->nz_unneeded  = irecv[2];
1007     info->memory       = irecv[3];
1008     info->mallocs      = irecv[4];
1009   } else if (flag == MAT_GLOBAL_SUM) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   }
1017   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1018   info->fill_ratio_needed = 0;
1019   info->factor_mallocs    = 0;
1020   info->rows_global       = (double)matin->M;
1021   info->columns_global    = (double)matin->N;
1022   info->rows_local        = (double)matin->m;
1023   info->columns_local     = (double)matin->N;
1024 
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatSetOption_MPIAIJ"
1030 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1033   int        ierr;
1034 
1035   PetscFunctionBegin;
1036   switch (op) {
1037   case MAT_NO_NEW_NONZERO_LOCATIONS:
1038   case MAT_YES_NEW_NONZERO_LOCATIONS:
1039   case MAT_COLUMNS_UNSORTED:
1040   case MAT_COLUMNS_SORTED:
1041   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1042   case MAT_KEEP_ZEROED_ROWS:
1043   case MAT_NEW_NONZERO_LOCATION_ERR:
1044   case MAT_USE_INODES:
1045   case MAT_DO_NOT_USE_INODES:
1046   case MAT_IGNORE_ZERO_ENTRIES:
1047     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1048     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1049     break;
1050   case MAT_ROW_ORIENTED:
1051     a->roworiented = PETSC_TRUE;
1052     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1053     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1054     break;
1055   case MAT_ROWS_SORTED:
1056   case MAT_ROWS_UNSORTED:
1057   case MAT_YES_NEW_DIAGONALS:
1058   case MAT_USE_SINGLE_PRECISION_SOLVES:
1059     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1060     break;
1061   case MAT_COLUMN_ORIENTED:
1062     a->roworiented = PETSC_FALSE;
1063     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1064     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1065     break;
1066   case MAT_IGNORE_OFF_PROC_ENTRIES:
1067     a->donotstash = PETSC_TRUE;
1068     break;
1069   case MAT_NO_NEW_DIAGONALS:
1070     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1071     break;
1072   default:
1073     SETERRQ(PETSC_ERR_SUP,"unknown option");
1074     break;
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRow_MPIAIJ"
1081 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1082 {
1083   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1084   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1085   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1086   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1087   int          *cmap,*idx_p;
1088 
1089   PetscFunctionBegin;
1090   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1091   mat->getrowactive = PETSC_TRUE;
1092 
1093   if (!mat->rowvalues && (idx || v)) {
1094     /*
1095         allocate enough space to hold information from the longest row.
1096     */
1097     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1098     int     max = 1,tmp;
1099     for (i=0; i<matin->m; i++) {
1100       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1101       if (max < tmp) { max = tmp; }
1102     }
1103     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1104     mat->rowindices = (int*)(mat->rowvalues + max);
1105   }
1106 
1107   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1108   lrow = row - rstart;
1109 
1110   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1111   if (!v)   {pvA = 0; pvB = 0;}
1112   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1113   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1114   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1115   nztot = nzA + nzB;
1116 
1117   cmap  = mat->garray;
1118   if (v  || idx) {
1119     if (nztot) {
1120       /* Sort by increasing column numbers, assuming A and B already sorted */
1121       int imark = -1;
1122       if (v) {
1123         *v = v_p = mat->rowvalues;
1124         for (i=0; i<nzB; i++) {
1125           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1126           else break;
1127         }
1128         imark = i;
1129         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1130         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1131       }
1132       if (idx) {
1133         *idx = idx_p = mat->rowindices;
1134         if (imark > -1) {
1135           for (i=0; i<imark; i++) {
1136             idx_p[i] = cmap[cworkB[i]];
1137           }
1138         } else {
1139           for (i=0; i<nzB; i++) {
1140             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1141             else break;
1142           }
1143           imark = i;
1144         }
1145         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1146         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1147       }
1148     } else {
1149       if (idx) *idx = 0;
1150       if (v)   *v   = 0;
1151     }
1152   }
1153   *nz = nztot;
1154   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1155   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1161 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1162 {
1163   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1164 
1165   PetscFunctionBegin;
1166   if (aij->getrowactive == PETSC_FALSE) {
1167     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1168   }
1169   aij->getrowactive = PETSC_FALSE;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatNorm_MPIAIJ"
1175 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1176 {
1177   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1178   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1179   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1180   PetscReal    sum = 0.0;
1181   PetscScalar  *v;
1182 
1183   PetscFunctionBegin;
1184   if (aij->size == 1) {
1185     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1186   } else {
1187     if (type == NORM_FROBENIUS) {
1188       v = amat->a;
1189       for (i=0; i<amat->nz; i++) {
1190 #if defined(PETSC_USE_COMPLEX)
1191         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1192 #else
1193         sum += (*v)*(*v); v++;
1194 #endif
1195       }
1196       v = bmat->a;
1197       for (i=0; i<bmat->nz; i++) {
1198 #if defined(PETSC_USE_COMPLEX)
1199         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1200 #else
1201         sum += (*v)*(*v); v++;
1202 #endif
1203       }
1204       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1205       *norm = sqrt(*norm);
1206     } else if (type == NORM_1) { /* max column norm */
1207       PetscReal *tmp,*tmp2;
1208       int    *jj,*garray = aij->garray;
1209       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1210       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1211       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1212       *norm = 0.0;
1213       v = amat->a; jj = amat->j;
1214       for (j=0; j<amat->nz; j++) {
1215         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1216       }
1217       v = bmat->a; jj = bmat->j;
1218       for (j=0; j<bmat->nz; j++) {
1219         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1220       }
1221       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1222       for (j=0; j<mat->N; j++) {
1223         if (tmp2[j] > *norm) *norm = tmp2[j];
1224       }
1225       ierr = PetscFree(tmp);CHKERRQ(ierr);
1226       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1227     } else if (type == NORM_INFINITY) { /* max row norm */
1228       PetscReal ntemp = 0.0;
1229       for (j=0; j<aij->A->m; j++) {
1230         v = amat->a + amat->i[j] + shift;
1231         sum = 0.0;
1232         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1233           sum += PetscAbsScalar(*v); v++;
1234         }
1235         v = bmat->a + bmat->i[j] + shift;
1236         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1237           sum += PetscAbsScalar(*v); v++;
1238         }
1239         if (sum > ntemp) ntemp = sum;
1240       }
1241       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1242     } else {
1243       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1244     }
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatTranspose_MPIAIJ"
1251 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1252 {
1253   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1254   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1255   int          ierr,shift = Aloc->indexshift;
1256   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1257   Mat          B;
1258   PetscScalar  *array;
1259 
1260   PetscFunctionBegin;
1261   if (!matout && M != N) {
1262     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1263   }
1264 
1265   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1266 
1267   /* copy over the A part */
1268   Aloc = (Mat_SeqAIJ*)a->A->data;
1269   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1270   row = a->rstart;
1271   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1272   for (i=0; i<m; i++) {
1273     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1274     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1275   }
1276   aj = Aloc->j;
1277   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1278 
1279   /* copy over the B part */
1280   Aloc = (Mat_SeqAIJ*)a->B->data;
1281   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1282   row  = a->rstart;
1283   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1284   ct   = cols;
1285   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1286   for (i=0; i<m; i++) {
1287     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1288     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1289   }
1290   ierr = PetscFree(ct);CHKERRQ(ierr);
1291   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1293   if (matout) {
1294     *matout = B;
1295   } else {
1296     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1303 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1304 {
1305   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1306   Mat        a = aij->A,b = aij->B;
1307   int        ierr,s1,s2,s3;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1311   if (rr) {
1312     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1313     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1314     /* Overlap communication with computation. */
1315     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1316   }
1317   if (ll) {
1318     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1319     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1320     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1321   }
1322   /* scale  the diagonal block */
1323   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1324 
1325   if (rr) {
1326     /* Do a scatter end and then right scale the off-diagonal block */
1327     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1328     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1329   }
1330 
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1337 int MatPrintHelp_MPIAIJ(Mat A)
1338 {
1339   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1340   int        ierr;
1341 
1342   PetscFunctionBegin;
1343   if (!a->rank) {
1344     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1351 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1352 {
1353   PetscFunctionBegin;
1354   *bs = 1;
1355   PetscFunctionReturn(0);
1356 }
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1359 int MatSetUnfactored_MPIAIJ(Mat A)
1360 {
1361   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1362   int        ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatEqual_MPIAIJ"
1371 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1372 {
1373   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1374   Mat        a,b,c,d;
1375   PetscTruth flg;
1376   int        ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1380   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1381   a = matA->A; b = matA->B;
1382   c = matB->A; d = matB->B;
1383 
1384   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1385   if (flg == PETSC_TRUE) {
1386     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1387   }
1388   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatCopy_MPIAIJ"
1394 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1395 {
1396   int        ierr;
1397   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1398   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1399   PetscTruth flg;
1400 
1401   PetscFunctionBegin;
1402   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1403   if (str != SAME_NONZERO_PATTERN || !flg) {
1404     /* because of the column compression in the off-processor part of the matrix a->B,
1405        the number of columns in a->B and b->B may be different, hence we cannot call
1406        the MatCopy() directly on the two parts. If need be, we can provide a more
1407        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1408        then copying the submatrices */
1409     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1410   } else {
1411     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1412     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1419 int MatSetUpPreallocation_MPIAIJ(Mat A)
1420 {
1421   int        ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1429 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1430 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1431 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1432 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1433 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1434 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1435 #endif
1436 
1437 /* -------------------------------------------------------------------*/
1438 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1439        MatGetRow_MPIAIJ,
1440        MatRestoreRow_MPIAIJ,
1441        MatMult_MPIAIJ,
1442        MatMultAdd_MPIAIJ,
1443        MatMultTranspose_MPIAIJ,
1444        MatMultTransposeAdd_MPIAIJ,
1445        0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        0,
1451        MatRelax_MPIAIJ,
1452        MatTranspose_MPIAIJ,
1453        MatGetInfo_MPIAIJ,
1454        MatEqual_MPIAIJ,
1455        MatGetDiagonal_MPIAIJ,
1456        MatDiagonalScale_MPIAIJ,
1457        MatNorm_MPIAIJ,
1458        MatAssemblyBegin_MPIAIJ,
1459        MatAssemblyEnd_MPIAIJ,
1460        0,
1461        MatSetOption_MPIAIJ,
1462        MatZeroEntries_MPIAIJ,
1463        MatZeroRows_MPIAIJ,
1464 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1465        MatLUFactorSymbolic_MPIAIJ_TFS,
1466 #else
1467        0,
1468 #endif
1469        0,
1470        0,
1471        0,
1472        MatSetUpPreallocation_MPIAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477        MatDuplicate_MPIAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482        0,
1483        MatGetSubMatrices_MPIAIJ,
1484        MatIncreaseOverlap_MPIAIJ,
1485        MatGetValues_MPIAIJ,
1486        MatCopy_MPIAIJ,
1487        MatPrintHelp_MPIAIJ,
1488        MatScale_MPIAIJ,
1489        0,
1490        0,
1491        0,
1492        MatGetBlockSize_MPIAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497        MatFDColoringCreate_MPIAIJ,
1498        0,
1499        MatSetUnfactored_MPIAIJ,
1500        0,
1501        0,
1502        MatGetSubMatrix_MPIAIJ,
1503        MatDestroy_MPIAIJ,
1504        MatView_MPIAIJ,
1505        MatGetPetscMaps_Petsc,
1506        0,
1507        0,
1508        0,
1509        0,
1510        0,
1511        0,
1512        0,
1513        0,
1514        MatSetColoring_MPIAIJ,
1515        MatSetValuesAdic_MPIAIJ,
1516        MatSetValuesAdifor_MPIAIJ
1517 };
1518 
1519 /* ----------------------------------------------------------------------------------------*/
1520 
1521 EXTERN_C_BEGIN
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1524 int MatStoreValues_MPIAIJ(Mat mat)
1525 {
1526   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1527   int        ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1531   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 EXTERN_C_END
1535 
1536 EXTERN_C_BEGIN
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1539 int MatRetrieveValues_MPIAIJ(Mat mat)
1540 {
1541   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1542   int        ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1546   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 EXTERN_C_END
1550 
1551 #include "petscpc.h"
1552 EXTERN_C_BEGIN
1553 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1554 EXTERN_C_END
1555 
1556 EXTERN_C_BEGIN
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatCreate_MPIAIJ"
1559 int MatCreate_MPIAIJ(Mat B)
1560 {
1561   Mat_MPIAIJ   *b;
1562   int          ierr,i,size;
1563 
1564   PetscFunctionBegin;
1565   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1566 
1567   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1568   B->data         = (void*)b;
1569   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1570   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1571   B->factor       = 0;
1572   B->assembled    = PETSC_FALSE;
1573   B->mapping      = 0;
1574 
1575   B->insertmode      = NOT_SET_VALUES;
1576   b->size            = size;
1577   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1578 
1579   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1580   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1581 
1582   /* the information in the maps duplicates the information computed below, eventually
1583      we should remove the duplicate information that is not contained in the maps */
1584   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1585   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1586 
1587   /* build local table of row and column ownerships */
1588   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1589   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1590   b->cowners = b->rowners + b->size + 2;
1591   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1592   b->rowners[0] = 0;
1593   for (i=2; i<=b->size; i++) {
1594     b->rowners[i] += b->rowners[i-1];
1595   }
1596   b->rstart = b->rowners[b->rank];
1597   b->rend   = b->rowners[b->rank+1];
1598   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1599   b->cowners[0] = 0;
1600   for (i=2; i<=b->size; i++) {
1601     b->cowners[i] += b->cowners[i-1];
1602   }
1603   b->cstart = b->cowners[b->rank];
1604   b->cend   = b->cowners[b->rank+1];
1605 
1606   /* build cache for off array entries formed */
1607   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1608   b->donotstash  = PETSC_FALSE;
1609   b->colmap      = 0;
1610   b->garray      = 0;
1611   b->roworiented = PETSC_TRUE;
1612 
1613   /* stuff used for matrix vector multiply */
1614   b->lvec      = PETSC_NULL;
1615   b->Mvctx     = PETSC_NULL;
1616 
1617   /* stuff for MatGetRow() */
1618   b->rowindices   = 0;
1619   b->rowvalues    = 0;
1620   b->getrowactive = PETSC_FALSE;
1621 
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1623                                      "MatStoreValues_MPIAIJ",
1624                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1626                                      "MatRetrieveValues_MPIAIJ",
1627                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1629                                      "MatGetDiagonalBlock_MPIAIJ",
1630                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 EXTERN_C_END
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1637 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1638 {
1639   Mat        mat;
1640   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1641   int        ierr;
1642 
1643   PetscFunctionBegin;
1644   *newmat       = 0;
1645   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1646   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1647   a    = (Mat_MPIAIJ*)mat->data;
1648   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1649   mat->factor       = matin->factor;
1650   mat->assembled    = PETSC_TRUE;
1651   mat->insertmode   = NOT_SET_VALUES;
1652   mat->preallocated = PETSC_TRUE;
1653 
1654   a->rstart       = oldmat->rstart;
1655   a->rend         = oldmat->rend;
1656   a->cstart       = oldmat->cstart;
1657   a->cend         = oldmat->cend;
1658   a->size         = oldmat->size;
1659   a->rank         = oldmat->rank;
1660   a->donotstash   = oldmat->donotstash;
1661   a->roworiented  = oldmat->roworiented;
1662   a->rowindices   = 0;
1663   a->rowvalues    = 0;
1664   a->getrowactive = PETSC_FALSE;
1665 
1666   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1667   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1668   if (oldmat->colmap) {
1669 #if defined (PETSC_USE_CTABLE)
1670     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1671 #else
1672     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1673     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1674     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1675 #endif
1676   } else a->colmap = 0;
1677   if (oldmat->garray) {
1678     int len;
1679     len  = oldmat->B->n;
1680     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1681     PetscLogObjectMemory(mat,len*sizeof(int));
1682     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1683   } else a->garray = 0;
1684 
1685   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1686   PetscLogObjectParent(mat,a->lvec);
1687   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1688   PetscLogObjectParent(mat,a->Mvctx);
1689   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1690   PetscLogObjectParent(mat,a->A);
1691   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1692   PetscLogObjectParent(mat,a->B);
1693   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1694   *newmat = mat;
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #include "petscsys.h"
1699 
1700 EXTERN_C_BEGIN
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatLoad_MPIAIJ"
1703 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1704 {
1705   Mat          A;
1706   PetscScalar  *vals,*svals;
1707   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1708   MPI_Status   status;
1709   int          i,nz,ierr,j,rstart,rend,fd;
1710   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1711   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1712   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1713 
1714   PetscFunctionBegin;
1715   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1716   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1717   if (!rank) {
1718     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1719     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1720     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1721     if (header[3] < 0) {
1722       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1723     }
1724   }
1725 
1726   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1727   M = header[1]; N = header[2];
1728   /* determine ownership of all rows */
1729   m = M/size + ((M % size) > rank);
1730   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1731   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1732   rowners[0] = 0;
1733   for (i=2; i<=size; i++) {
1734     rowners[i] += rowners[i-1];
1735   }
1736   rstart = rowners[rank];
1737   rend   = rowners[rank+1];
1738 
1739   /* distribute row lengths to all processors */
1740   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1741   offlens = ourlens + (rend-rstart);
1742   if (!rank) {
1743     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1744     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1745     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1746     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1747     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1748     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1749   } else {
1750     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1751   }
1752 
1753   if (!rank) {
1754     /* calculate the number of nonzeros on each processor */
1755     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1756     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1757     for (i=0; i<size; i++) {
1758       for (j=rowners[i]; j< rowners[i+1]; j++) {
1759         procsnz[i] += rowlengths[j];
1760       }
1761     }
1762     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1763 
1764     /* determine max buffer needed and allocate it */
1765     maxnz = 0;
1766     for (i=0; i<size; i++) {
1767       maxnz = PetscMax(maxnz,procsnz[i]);
1768     }
1769     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1770 
1771     /* read in my part of the matrix column indices  */
1772     nz   = procsnz[0];
1773     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1774     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1775 
1776     /* read in every one elses and ship off */
1777     for (i=1; i<size; i++) {
1778       nz   = procsnz[i];
1779       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1780       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1781     }
1782     ierr = PetscFree(cols);CHKERRQ(ierr);
1783   } else {
1784     /* determine buffer space needed for message */
1785     nz = 0;
1786     for (i=0; i<m; i++) {
1787       nz += ourlens[i];
1788     }
1789     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1790 
1791     /* receive message of column indices*/
1792     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1793     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1794     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1795   }
1796 
1797   /* determine column ownership if matrix is not square */
1798   if (N != M) {
1799     n      = N/size + ((N % size) > rank);
1800     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1801     cstart = cend - n;
1802   } else {
1803     cstart = rstart;
1804     cend   = rend;
1805     n      = cend - cstart;
1806   }
1807 
1808   /* loop over local rows, determining number of off diagonal entries */
1809   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1810   jj = 0;
1811   for (i=0; i<m; i++) {
1812     for (j=0; j<ourlens[i]; j++) {
1813       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1814       jj++;
1815     }
1816   }
1817 
1818   /* create our matrix */
1819   for (i=0; i<m; i++) {
1820     ourlens[i] -= offlens[i];
1821   }
1822   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1823   A = *newmat;
1824   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1825   for (i=0; i<m; i++) {
1826     ourlens[i] += offlens[i];
1827   }
1828 
1829   if (!rank) {
1830     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1831 
1832     /* read in my part of the matrix numerical values  */
1833     nz   = procsnz[0];
1834     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1835 
1836     /* insert into matrix */
1837     jj      = rstart;
1838     smycols = mycols;
1839     svals   = vals;
1840     for (i=0; i<m; i++) {
1841       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1842       smycols += ourlens[i];
1843       svals   += ourlens[i];
1844       jj++;
1845     }
1846 
1847     /* read in other processors and ship out */
1848     for (i=1; i<size; i++) {
1849       nz   = procsnz[i];
1850       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1851       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1852     }
1853     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1854   } else {
1855     /* receive numeric values */
1856     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1857 
1858     /* receive message of values*/
1859     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1860     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1861     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1862 
1863     /* insert into matrix */
1864     jj      = rstart;
1865     smycols = mycols;
1866     svals   = vals;
1867     for (i=0; i<m; i++) {
1868       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1869       smycols += ourlens[i];
1870       svals   += ourlens[i];
1871       jj++;
1872     }
1873   }
1874   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1875   ierr = PetscFree(vals);CHKERRQ(ierr);
1876   ierr = PetscFree(mycols);CHKERRQ(ierr);
1877   ierr = PetscFree(rowners);CHKERRQ(ierr);
1878 
1879   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 EXTERN_C_END
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1887 /*
1888     Not great since it makes two copies of the submatrix, first an SeqAIJ
1889   in local and then by concatenating the local matrices the end result.
1890   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1891 */
1892 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1893 {
1894   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1895   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1896   Mat          *local,M,Mreuse;
1897   PetscScalar  *vwork,*aa;
1898   MPI_Comm     comm = mat->comm;
1899   Mat_SeqAIJ   *aij;
1900 
1901 
1902   PetscFunctionBegin;
1903   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1904   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1905 
1906   if (call ==  MAT_REUSE_MATRIX) {
1907     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1908     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1909     local = &Mreuse;
1910     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1911   } else {
1912     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1913     Mreuse = *local;
1914     ierr   = PetscFree(local);CHKERRQ(ierr);
1915   }
1916 
1917   /*
1918       m - number of local rows
1919       n - number of columns (same on all processors)
1920       rstart - first row in new global matrix generated
1921   */
1922   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1923   if (call == MAT_INITIAL_MATRIX) {
1924     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1925     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1926     ii  = aij->i;
1927     jj  = aij->j;
1928 
1929     /*
1930         Determine the number of non-zeros in the diagonal and off-diagonal
1931         portions of the matrix in order to do correct preallocation
1932     */
1933 
1934     /* first get start and end of "diagonal" columns */
1935     if (csize == PETSC_DECIDE) {
1936       nlocal = n/size + ((n % size) > rank);
1937     } else {
1938       nlocal = csize;
1939     }
1940     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1941     rstart = rend - nlocal;
1942     if (rank == size - 1 && rend != n) {
1943       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1944     }
1945 
1946     /* next, compute all the lengths */
1947     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1948     olens = dlens + m;
1949     for (i=0; i<m; i++) {
1950       jend = ii[i+1] - ii[i];
1951       olen = 0;
1952       dlen = 0;
1953       for (j=0; j<jend; j++) {
1954         if (*jj < rstart || *jj >= rend) olen++;
1955         else dlen++;
1956         jj++;
1957       }
1958       olens[i] = olen;
1959       dlens[i] = dlen;
1960     }
1961     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1962     ierr = PetscFree(dlens);CHKERRQ(ierr);
1963   } else {
1964     int ml,nl;
1965 
1966     M = *newmat;
1967     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1968     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1969     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1970     /*
1971          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1972        rather than the slower MatSetValues().
1973     */
1974     M->was_assembled = PETSC_TRUE;
1975     M->assembled     = PETSC_FALSE;
1976   }
1977   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1978   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1979   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1980   ii  = aij->i;
1981   jj  = aij->j;
1982   aa  = aij->a;
1983   for (i=0; i<m; i++) {
1984     row   = rstart + i;
1985     nz    = ii[i+1] - ii[i];
1986     cwork = jj;     jj += nz;
1987     vwork = aa;     aa += nz;
1988     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1989   }
1990 
1991   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1992   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   *newmat = M;
1994 
1995   /* save submatrix used in processor for next request */
1996   if (call ==  MAT_INITIAL_MATRIX) {
1997     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1998     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1999   }
2000 
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2006 /*@C
2007    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2008    (the default parallel PETSc format).  For good matrix assembly performance
2009    the user should preallocate the matrix storage by setting the parameters
2010    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2011    performance can be increased by more than a factor of 50.
2012 
2013    Collective on MPI_Comm
2014 
2015    Input Parameters:
2016 +  A - the matrix
2017 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2018            (same value is used for all local rows)
2019 .  d_nnz - array containing the number of nonzeros in the various rows of the
2020            DIAGONAL portion of the local submatrix (possibly different for each row)
2021            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2022            The size of this array is equal to the number of local rows, i.e 'm'.
2023            You must leave room for the diagonal entry even if it is zero.
2024 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2025            submatrix (same value is used for all local rows).
2026 -  o_nnz - array containing the number of nonzeros in the various rows of the
2027            OFF-DIAGONAL portion of the local submatrix (possibly different for
2028            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2029            structure. The size of this array is equal to the number
2030            of local rows, i.e 'm'.
2031 
2032    The AIJ format (also called the Yale sparse matrix format or
2033    compressed row storage), is fully compatible with standard Fortran 77
2034    storage.  That is, the stored row and column indices can begin at
2035    either one (as in Fortran) or zero.  See the users manual for details.
2036 
2037    The user MUST specify either the local or global matrix dimensions
2038    (possibly both).
2039 
2040    The parallel matrix is partitioned such that the first m0 rows belong to
2041    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2042    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2043 
2044    The DIAGONAL portion of the local submatrix of a processor can be defined
2045    as the submatrix which is obtained by extraction the part corresponding
2046    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2047    first row that belongs to the processor, and r2 is the last row belonging
2048    to the this processor. This is a square mxm matrix. The remaining portion
2049    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2050 
2051    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2052 
2053    By default, this format uses inodes (identical nodes) when possible.
2054    We search for consecutive rows with the same nonzero structure, thereby
2055    reusing matrix information to achieve increased efficiency.
2056 
2057    Options Database Keys:
2058 +  -mat_aij_no_inode  - Do not use inodes
2059 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2060 -  -mat_aij_oneindex - Internally use indexing starting at 1
2061         rather than 0.  Note that when calling MatSetValues(),
2062         the user still MUST index entries starting at 0!
2063 
2064    Example usage:
2065 
2066    Consider the following 8x8 matrix with 34 non-zero values, that is
2067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2069    as follows:
2070 
2071 .vb
2072             1  2  0  |  0  3  0  |  0  4
2073     Proc0   0  5  6  |  7  0  0  |  8  0
2074             9  0 10  | 11  0  0  | 12  0
2075     -------------------------------------
2076            13  0 14  | 15 16 17  |  0  0
2077     Proc1   0 18  0  | 19 20 21  |  0  0
2078             0  0  0  | 22 23  0  | 24  0
2079     -------------------------------------
2080     Proc2  25 26 27  |  0  0 28  | 29  0
2081            30  0  0  | 31 32 33  |  0 34
2082 .ve
2083 
2084    This can be represented as a collection of submatrices as:
2085 
2086 .vb
2087       A B C
2088       D E F
2089       G H I
2090 .ve
2091 
2092    Where the submatrices A,B,C are owned by proc0, D,E,F are
2093    owned by proc1, G,H,I are owned by proc2.
2094 
2095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2097    The 'M','N' parameters are 8,8, and have the same values on all procs.
2098 
2099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2103    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2104    matrix, ans [DF] as another SeqAIJ matrix.
2105 
2106    When d_nz, o_nz parameters are specified, d_nz storage elements are
2107    allocated for every row of the local diagonal submatrix, and o_nz
2108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2109    One way to choose d_nz and o_nz is to use the max nonzerors per local
2110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2111    In this case, the values of d_nz,o_nz are:
2112 .vb
2113      proc0 : dnz = 2, o_nz = 2
2114      proc1 : dnz = 3, o_nz = 2
2115      proc2 : dnz = 1, o_nz = 4
2116 .ve
2117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2119    for proc3. i.e we are using 12+15+10=37 storage locations to store
2120    34 values.
2121 
2122    When d_nnz, o_nnz parameters are specified, the storage is specified
2123    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2124    In the above case the values for d_nnz,o_nnz are:
2125 .vb
2126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2129 .ve
2130    Here the space allocated is sum of all the above values i.e 34, and
2131    hence pre-allocation is perfect.
2132 
2133    Level: intermediate
2134 
2135 .keywords: matrix, aij, compressed row, sparse, parallel
2136 
2137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2138 @*/
2139 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2140 {
2141   Mat_MPIAIJ   *b;
2142   int          ierr,i;
2143   PetscTruth   flg2;
2144 
2145   PetscFunctionBegin;
2146   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2147   if (!flg2) PetscFunctionReturn(0);
2148   B->preallocated = PETSC_TRUE;
2149   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2150   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2151   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2152   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2153   if (d_nnz) {
2154     for (i=0; i<B->m; i++) {
2155       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2156     }
2157   }
2158   if (o_nnz) {
2159     for (i=0; i<B->m; i++) {
2160       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2161     }
2162   }
2163   b = (Mat_MPIAIJ*)B->data;
2164 
2165   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2166   PetscLogObjectParent(B,b->A);
2167   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2168   PetscLogObjectParent(B,b->B);
2169 
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatCreateMPIAIJ"
2175 /*@C
2176    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2177    (the default parallel PETSc format).  For good matrix assembly performance
2178    the user should preallocate the matrix storage by setting the parameters
2179    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2180    performance can be increased by more than a factor of 50.
2181 
2182    Collective on MPI_Comm
2183 
2184    Input Parameters:
2185 +  comm - MPI communicator
2186 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2187            This value should be the same as the local size used in creating the
2188            y vector for the matrix-vector product y = Ax.
2189 .  n - This value should be the same as the local size used in creating the
2190        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2191        calculated if N is given) For square matrices n is almost always m.
2192 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2193 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2194 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2195            (same value is used for all local rows)
2196 .  d_nnz - array containing the number of nonzeros in the various rows of the
2197            DIAGONAL portion of the local submatrix (possibly different for each row)
2198            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2199            The size of this array is equal to the number of local rows, i.e 'm'.
2200            You must leave room for the diagonal entry even if it is zero.
2201 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2202            submatrix (same value is used for all local rows).
2203 -  o_nnz - array containing the number of nonzeros in the various rows of the
2204            OFF-DIAGONAL portion of the local submatrix (possibly different for
2205            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2206            structure. The size of this array is equal to the number
2207            of local rows, i.e 'm'.
2208 
2209    Output Parameter:
2210 .  A - the matrix
2211 
2212    Notes:
2213    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2214    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2215    storage requirements for this matrix.
2216 
2217    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2218    processor than it must be used on all processors that share the object for
2219    that argument.
2220 
2221    The AIJ format (also called the Yale sparse matrix format or
2222    compressed row storage), is fully compatible with standard Fortran 77
2223    storage.  That is, the stored row and column indices can begin at
2224    either one (as in Fortran) or zero.  See the users manual for details.
2225 
2226    The user MUST specify either the local or global matrix dimensions
2227    (possibly both).
2228 
2229    The parallel matrix is partitioned such that the first m0 rows belong to
2230    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2231    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2232 
2233    The DIAGONAL portion of the local submatrix of a processor can be defined
2234    as the submatrix which is obtained by extraction the part corresponding
2235    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2236    first row that belongs to the processor, and r2 is the last row belonging
2237    to the this processor. This is a square mxm matrix. The remaining portion
2238    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2239 
2240    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2241 
2242    By default, this format uses inodes (identical nodes) when possible.
2243    We search for consecutive rows with the same nonzero structure, thereby
2244    reusing matrix information to achieve increased efficiency.
2245 
2246    Options Database Keys:
2247 +  -mat_aij_no_inode  - Do not use inodes
2248 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2249 -  -mat_aij_oneindex - Internally use indexing starting at 1
2250         rather than 0.  Note that when calling MatSetValues(),
2251         the user still MUST index entries starting at 0!
2252 
2253 
2254    Example usage:
2255 
2256    Consider the following 8x8 matrix with 34 non-zero values, that is
2257    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2258    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2259    as follows:
2260 
2261 .vb
2262             1  2  0  |  0  3  0  |  0  4
2263     Proc0   0  5  6  |  7  0  0  |  8  0
2264             9  0 10  | 11  0  0  | 12  0
2265     -------------------------------------
2266            13  0 14  | 15 16 17  |  0  0
2267     Proc1   0 18  0  | 19 20 21  |  0  0
2268             0  0  0  | 22 23  0  | 24  0
2269     -------------------------------------
2270     Proc2  25 26 27  |  0  0 28  | 29  0
2271            30  0  0  | 31 32 33  |  0 34
2272 .ve
2273 
2274    This can be represented as a collection of submatrices as:
2275 
2276 .vb
2277       A B C
2278       D E F
2279       G H I
2280 .ve
2281 
2282    Where the submatrices A,B,C are owned by proc0, D,E,F are
2283    owned by proc1, G,H,I are owned by proc2.
2284 
2285    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2286    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2287    The 'M','N' parameters are 8,8, and have the same values on all procs.
2288 
2289    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2290    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2291    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2292    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2293    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2294    matrix, ans [DF] as another SeqAIJ matrix.
2295 
2296    When d_nz, o_nz parameters are specified, d_nz storage elements are
2297    allocated for every row of the local diagonal submatrix, and o_nz
2298    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2299    One way to choose d_nz and o_nz is to use the max nonzerors per local
2300    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2301    In this case, the values of d_nz,o_nz are:
2302 .vb
2303      proc0 : dnz = 2, o_nz = 2
2304      proc1 : dnz = 3, o_nz = 2
2305      proc2 : dnz = 1, o_nz = 4
2306 .ve
2307    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2308    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2309    for proc3. i.e we are using 12+15+10=37 storage locations to store
2310    34 values.
2311 
2312    When d_nnz, o_nnz parameters are specified, the storage is specified
2313    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2314    In the above case the values for d_nnz,o_nnz are:
2315 .vb
2316      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2317      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2318      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2319 .ve
2320    Here the space allocated is sum of all the above values i.e 34, and
2321    hence pre-allocation is perfect.
2322 
2323    Level: intermediate
2324 
2325 .keywords: matrix, aij, compressed row, sparse, parallel
2326 
2327 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2328 @*/
2329 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2330 {
2331   int ierr,size;
2332 
2333   PetscFunctionBegin;
2334   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2335   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2336   if (size > 1) {
2337     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2339   } else {
2340     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2341     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2348 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2349 {
2350   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2351   PetscFunctionBegin;
2352   *Ad     = a->A;
2353   *Ao     = a->B;
2354   *colmap = a->garray;
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2360 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2361 {
2362   int        ierr;
2363   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2364 
2365   PetscFunctionBegin;
2366   if (coloring->ctype == IS_COLORING_LOCAL) {
2367     int        *allcolors,*colors,i;
2368     ISColoring ocoloring;
2369 
2370     /* set coloring for diagonal portion */
2371     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2372 
2373     /* set coloring for off-diagonal portion */
2374     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2375     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2376     for (i=0; i<a->B->n; i++) {
2377       colors[i] = allcolors[a->garray[i]];
2378     }
2379     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2380     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2381     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2382     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2383   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2384     int        *colors,i,*larray;
2385     ISColoring ocoloring;
2386 
2387     /* set coloring for diagonal portion */
2388     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2389     for (i=0; i<a->A->n; i++) {
2390       larray[i] = i + a->cstart;
2391     }
2392     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2393     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2394     for (i=0; i<a->A->n; i++) {
2395       colors[i] = coloring->colors[larray[i]];
2396     }
2397     ierr = PetscFree(larray);CHKERRQ(ierr);
2398     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2399     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2400     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2401 
2402     /* set coloring for off-diagonal portion */
2403     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2404     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2405     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2406     for (i=0; i<a->B->n; i++) {
2407       colors[i] = coloring->colors[larray[i]];
2408     }
2409     ierr = PetscFree(larray);CHKERRQ(ierr);
2410     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2411     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2412     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2413   } else {
2414     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2415   }
2416 
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2422 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2423 {
2424   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2425   int        ierr;
2426 
2427   PetscFunctionBegin;
2428   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2429   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2435 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2436 {
2437   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2438   int        ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2442   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445