xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8a4b9c5ccabce68944bb5653a09570755d9895fd)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
722   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
723   PetscTruth        isdraw,isascii,flg;
724   PetscViewer       sviewer;
725   PetscViewerFormat format;
726 
727   PetscFunctionBegin;
728   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
730   if (isascii) {
731     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
732     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
733       MatInfo info;
734       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
735       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
737       if (flg) {
738         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
739 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
740       } else {
741         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
742 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
743       }
744       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
749       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
750       PetscFunctionReturn(0);
751     } else if (format == PETSC_VIEWER_ASCII_INFO) {
752       PetscFunctionReturn(0);
753     }
754   } else if (isdraw) {
755     PetscDraw       draw;
756     PetscTruth isnull;
757     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
758     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
759   }
760 
761   if (size == 1) {
762     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
763     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
764   } else {
765     /* assemble the entire matrix onto first processor. */
766     Mat         A;
767     Mat_SeqAIJ *Aloc;
768     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
769     PetscScalar *a;
770 
771     if (!rank) {
772       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     } else {
774       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
775     }
776     PetscLogObjectParent(mat,A);
777 
778     /* copy over the A part */
779     Aloc = (Mat_SeqAIJ*)aij->A->data;
780     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781     row = aij->rstart;
782     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
783     for (i=0; i<m; i++) {
784       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
785       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
786     }
787     aj = Aloc->j;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
789 
790     /* copy over the B part */
791     Aloc = (Mat_SeqAIJ*)aij->B->data;
792     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
793     row  = aij->rstart;
794     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
795     ct   = cols;
796     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
797     for (i=0; i<m; i++) {
798       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800     }
801     ierr = PetscFree(ct);CHKERRQ(ierr);
802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     /*
805        Everyone has to call to draw the matrix since the graphics waits are
806        synchronized across all processors that share the PetscDraw object
807     */
808     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
809     if (!rank) {
810       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
811       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
812     }
813     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     ierr = MatDestroy(A);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatView_MPIAIJ"
821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
822 {
823   int        ierr;
824   PetscTruth isascii,isdraw,issocket,isbinary;
825 
826   PetscFunctionBegin;
827   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
828   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
829   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
830   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
831   if (isascii || isdraw || isbinary || issocket) {
832     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
833   } else {
834     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatRelax_MPIAIJ"
843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
844 {
845   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
846   int          ierr;
847   Vec          bb1;
848   PetscScalar  mone=-1.0;
849 
850   PetscFunctionBegin;
851   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
852 
853   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
854 
855   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
856     if (flag & SOR_ZERO_INITIAL_GUESS) {
857       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
858       its--;
859     }
860 
861     while (its--) {
862       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
863       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
864 
865       /* update rhs: bb1 = bb - B*x */
866       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
867       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
868 
869       /* local sweep */
870       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
871       CHKERRQ(ierr);
872     }
873   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
876       its--;
877     }
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else {
908     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
909   }
910 
911   ierr = VecDestroy(bb1);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatGetInfo_MPIAIJ"
917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
918 {
919   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
920   Mat        A = mat->A,B = mat->B;
921   int        ierr;
922   PetscReal  isend[5],irecv[5];
923 
924   PetscFunctionBegin;
925   info->block_size     = 1.0;
926   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
927   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
928   isend[3] = info->memory;  isend[4] = info->mallocs;
929   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
930   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
931   isend[3] += info->memory;  isend[4] += info->mallocs;
932   if (flag == MAT_LOCAL) {
933     info->nz_used      = isend[0];
934     info->nz_allocated = isend[1];
935     info->nz_unneeded  = isend[2];
936     info->memory       = isend[3];
937     info->mallocs      = isend[4];
938   } else if (flag == MAT_GLOBAL_MAX) {
939     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
940     info->nz_used      = irecv[0];
941     info->nz_allocated = irecv[1];
942     info->nz_unneeded  = irecv[2];
943     info->memory       = irecv[3];
944     info->mallocs      = irecv[4];
945   } else if (flag == MAT_GLOBAL_SUM) {
946     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
947     info->nz_used      = irecv[0];
948     info->nz_allocated = irecv[1];
949     info->nz_unneeded  = irecv[2];
950     info->memory       = irecv[3];
951     info->mallocs      = irecv[4];
952   }
953   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
954   info->fill_ratio_needed = 0;
955   info->factor_mallocs    = 0;
956   info->rows_global       = (double)matin->M;
957   info->columns_global    = (double)matin->N;
958   info->rows_local        = (double)matin->m;
959   info->columns_local     = (double)matin->N;
960 
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatSetOption_MPIAIJ"
966 int MatSetOption_MPIAIJ(Mat A,MatOption op)
967 {
968   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
969   int        ierr;
970 
971   PetscFunctionBegin;
972   switch (op) {
973   case MAT_NO_NEW_NONZERO_LOCATIONS:
974   case MAT_YES_NEW_NONZERO_LOCATIONS:
975   case MAT_COLUMNS_UNSORTED:
976   case MAT_COLUMNS_SORTED:
977   case MAT_NEW_NONZERO_ALLOCATION_ERR:
978   case MAT_KEEP_ZEROED_ROWS:
979   case MAT_NEW_NONZERO_LOCATION_ERR:
980   case MAT_USE_INODES:
981   case MAT_DO_NOT_USE_INODES:
982   case MAT_IGNORE_ZERO_ENTRIES:
983     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
984     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
985     break;
986   case MAT_ROW_ORIENTED:
987     a->roworiented = PETSC_TRUE;
988     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
989     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
990     break;
991   case MAT_ROWS_SORTED:
992   case MAT_ROWS_UNSORTED:
993   case MAT_YES_NEW_DIAGONALS:
994   case MAT_USE_SINGLE_PRECISION_SOLVES:
995     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
996     break;
997   case MAT_COLUMN_ORIENTED:
998     a->roworiented = PETSC_FALSE;
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_IGNORE_OFF_PROC_ENTRIES:
1003     a->donotstash = PETSC_TRUE;
1004     break;
1005   case MAT_NO_NEW_DIAGONALS:
1006     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1007     break;
1008   default:
1009     SETERRQ(PETSC_ERR_SUP,"unknown option");
1010     break;
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPIAIJ"
1017 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1018 {
1019   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1021   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1022   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1023   int          *cmap,*idx_p;
1024 
1025   PetscFunctionBegin;
1026   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1027   mat->getrowactive = PETSC_TRUE;
1028 
1029   if (!mat->rowvalues && (idx || v)) {
1030     /*
1031         allocate enough space to hold information from the longest row.
1032     */
1033     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1034     int     max = 1,tmp;
1035     for (i=0; i<matin->m; i++) {
1036       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1037       if (max < tmp) { max = tmp; }
1038     }
1039     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1040     mat->rowindices = (int*)(mat->rowvalues + max);
1041   }
1042 
1043   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1044   lrow = row - rstart;
1045 
1046   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1047   if (!v)   {pvA = 0; pvB = 0;}
1048   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1049   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1050   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1051   nztot = nzA + nzB;
1052 
1053   cmap  = mat->garray;
1054   if (v  || idx) {
1055     if (nztot) {
1056       /* Sort by increasing column numbers, assuming A and B already sorted */
1057       int imark = -1;
1058       if (v) {
1059         *v = v_p = mat->rowvalues;
1060         for (i=0; i<nzB; i++) {
1061           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1062           else break;
1063         }
1064         imark = i;
1065         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1066         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1067       }
1068       if (idx) {
1069         *idx = idx_p = mat->rowindices;
1070         if (imark > -1) {
1071           for (i=0; i<imark; i++) {
1072             idx_p[i] = cmap[cworkB[i]];
1073           }
1074         } else {
1075           for (i=0; i<nzB; i++) {
1076             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1077             else break;
1078           }
1079           imark = i;
1080         }
1081         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1082         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1083       }
1084     } else {
1085       if (idx) *idx = 0;
1086       if (v)   *v   = 0;
1087     }
1088   }
1089   *nz = nztot;
1090   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1091   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1097 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1098 {
1099   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1100 
1101   PetscFunctionBegin;
1102   if (aij->getrowactive == PETSC_FALSE) {
1103     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1104   }
1105   aij->getrowactive = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatNorm_MPIAIJ"
1111 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1112 {
1113   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1114   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1115   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1116   PetscReal    sum = 0.0;
1117   PetscScalar  *v;
1118 
1119   PetscFunctionBegin;
1120   if (aij->size == 1) {
1121     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1122   } else {
1123     if (type == NORM_FROBENIUS) {
1124       v = amat->a;
1125       for (i=0; i<amat->nz; i++) {
1126 #if defined(PETSC_USE_COMPLEX)
1127         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1128 #else
1129         sum += (*v)*(*v); v++;
1130 #endif
1131       }
1132       v = bmat->a;
1133       for (i=0; i<bmat->nz; i++) {
1134 #if defined(PETSC_USE_COMPLEX)
1135         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1136 #else
1137         sum += (*v)*(*v); v++;
1138 #endif
1139       }
1140       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1141       *norm = sqrt(*norm);
1142     } else if (type == NORM_1) { /* max column norm */
1143       PetscReal *tmp,*tmp2;
1144       int    *jj,*garray = aij->garray;
1145       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1146       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1147       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1148       *norm = 0.0;
1149       v = amat->a; jj = amat->j;
1150       for (j=0; j<amat->nz; j++) {
1151         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1152       }
1153       v = bmat->a; jj = bmat->j;
1154       for (j=0; j<bmat->nz; j++) {
1155         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1156       }
1157       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1158       for (j=0; j<mat->N; j++) {
1159         if (tmp2[j] > *norm) *norm = tmp2[j];
1160       }
1161       ierr = PetscFree(tmp);CHKERRQ(ierr);
1162       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1163     } else if (type == NORM_INFINITY) { /* max row norm */
1164       PetscReal ntemp = 0.0;
1165       for (j=0; j<aij->A->m; j++) {
1166         v = amat->a + amat->i[j] + shift;
1167         sum = 0.0;
1168         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1169           sum += PetscAbsScalar(*v); v++;
1170         }
1171         v = bmat->a + bmat->i[j] + shift;
1172         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1173           sum += PetscAbsScalar(*v); v++;
1174         }
1175         if (sum > ntemp) ntemp = sum;
1176       }
1177       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1178     } else {
1179       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1180     }
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPIAIJ"
1187 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1188 {
1189   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1190   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1191   int          ierr,shift = Aloc->indexshift;
1192   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1193   Mat          B;
1194   PetscScalar  *array;
1195 
1196   PetscFunctionBegin;
1197   if (!matout && M != N) {
1198     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1199   }
1200 
1201   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1202 
1203   /* copy over the A part */
1204   Aloc = (Mat_SeqAIJ*)a->A->data;
1205   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1206   row = a->rstart;
1207   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1208   for (i=0; i<m; i++) {
1209     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1210     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1211   }
1212   aj = Aloc->j;
1213   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1214 
1215   /* copy over the B part */
1216   Aloc = (Mat_SeqAIJ*)a->B->data;
1217   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1218   row  = a->rstart;
1219   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1220   ct   = cols;
1221   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1222   for (i=0; i<m; i++) {
1223     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1224     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1225   }
1226   ierr = PetscFree(ct);CHKERRQ(ierr);
1227   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   if (matout) {
1230     *matout = B;
1231   } else {
1232     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1239 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1240 {
1241   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1242   Mat        a = aij->A,b = aij->B;
1243   int        ierr,s1,s2,s3;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1247   if (rr) {
1248     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1249     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1250     /* Overlap communication with computation. */
1251     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1252   }
1253   if (ll) {
1254     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1255     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1256     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1257   }
1258   /* scale  the diagonal block */
1259   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1260 
1261   if (rr) {
1262     /* Do a scatter end and then right scale the off-diagonal block */
1263     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1264     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1265   }
1266 
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1273 int MatPrintHelp_MPIAIJ(Mat A)
1274 {
1275   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1276   int        ierr;
1277 
1278   PetscFunctionBegin;
1279   if (!a->rank) {
1280     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1287 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1288 {
1289   PetscFunctionBegin;
1290   *bs = 1;
1291   PetscFunctionReturn(0);
1292 }
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1295 int MatSetUnfactored_MPIAIJ(Mat A)
1296 {
1297   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1298   int        ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatEqual_MPIAIJ"
1307 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1308 {
1309   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1310   Mat        a,b,c,d;
1311   PetscTruth flg;
1312   int        ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1316   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1317   a = matA->A; b = matA->B;
1318   c = matB->A; d = matB->B;
1319 
1320   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1321   if (flg == PETSC_TRUE) {
1322     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1323   }
1324   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCopy_MPIAIJ"
1330 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1331 {
1332   int        ierr;
1333   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1334   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1335   PetscTruth flg;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1339   if (str != SAME_NONZERO_PATTERN || !flg) {
1340     /* because of the column compression in the off-processor part of the matrix a->B,
1341        the number of columns in a->B and b->B may be different, hence we cannot call
1342        the MatCopy() directly on the two parts. If need be, we can provide a more
1343        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1344        then copying the submatrices */
1345     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1355 int MatSetUpPreallocation_MPIAIJ(Mat A)
1356 {
1357   int        ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1365 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1366 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1367 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1368 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1369 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1370 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1371 #endif
1372 
1373 #include "petscblaslapack.h"
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatAXPY_MPIAIJ"
1377 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1378 {
1379   int        ierr,one;
1380   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1381   Mat_SeqAIJ *x,*y;
1382 
1383   PetscFunctionBegin;
1384   if (str == SAME_NONZERO_PATTERN) {
1385     x  = (Mat_SeqAIJ *)xx->A->data;
1386     y  = (Mat_SeqAIJ *)yy->A->data;
1387     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1388     x  = (Mat_SeqAIJ *)xx->B->data;
1389     y  = (Mat_SeqAIJ *)yy->B->data;
1390     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1391   } else {
1392     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 /* -------------------------------------------------------------------*/
1398 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1399        MatGetRow_MPIAIJ,
1400        MatRestoreRow_MPIAIJ,
1401        MatMult_MPIAIJ,
1402        MatMultAdd_MPIAIJ,
1403        MatMultTranspose_MPIAIJ,
1404        MatMultTransposeAdd_MPIAIJ,
1405        0,
1406        0,
1407        0,
1408        0,
1409        0,
1410        0,
1411        MatRelax_MPIAIJ,
1412        MatTranspose_MPIAIJ,
1413        MatGetInfo_MPIAIJ,
1414        MatEqual_MPIAIJ,
1415        MatGetDiagonal_MPIAIJ,
1416        MatDiagonalScale_MPIAIJ,
1417        MatNorm_MPIAIJ,
1418        MatAssemblyBegin_MPIAIJ,
1419        MatAssemblyEnd_MPIAIJ,
1420        0,
1421        MatSetOption_MPIAIJ,
1422        MatZeroEntries_MPIAIJ,
1423        MatZeroRows_MPIAIJ,
1424 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1425        MatLUFactorSymbolic_MPIAIJ_TFS,
1426 #else
1427        0,
1428 #endif
1429        0,
1430        0,
1431        0,
1432        MatSetUpPreallocation_MPIAIJ,
1433        0,
1434        0,
1435        0,
1436        0,
1437        MatDuplicate_MPIAIJ,
1438        0,
1439        0,
1440        0,
1441        0,
1442        MatAXPY_MPIAIJ,
1443        MatGetSubMatrices_MPIAIJ,
1444        MatIncreaseOverlap_MPIAIJ,
1445        MatGetValues_MPIAIJ,
1446        MatCopy_MPIAIJ,
1447        MatPrintHelp_MPIAIJ,
1448        MatScale_MPIAIJ,
1449        0,
1450        0,
1451        0,
1452        MatGetBlockSize_MPIAIJ,
1453        0,
1454        0,
1455        0,
1456        0,
1457        MatFDColoringCreate_MPIAIJ,
1458        0,
1459        MatSetUnfactored_MPIAIJ,
1460        0,
1461        0,
1462        MatGetSubMatrix_MPIAIJ,
1463        MatDestroy_MPIAIJ,
1464        MatView_MPIAIJ,
1465        MatGetPetscMaps_Petsc,
1466        0,
1467        0,
1468        0,
1469        0,
1470        0,
1471        0,
1472        0,
1473        0,
1474        MatSetColoring_MPIAIJ,
1475        MatSetValuesAdic_MPIAIJ,
1476        MatSetValuesAdifor_MPIAIJ
1477 };
1478 
1479 /* ----------------------------------------------------------------------------------------*/
1480 
1481 EXTERN_C_BEGIN
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1484 int MatStoreValues_MPIAIJ(Mat mat)
1485 {
1486   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1487   int        ierr;
1488 
1489   PetscFunctionBegin;
1490   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1491   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 EXTERN_C_END
1495 
1496 EXTERN_C_BEGIN
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1499 int MatRetrieveValues_MPIAIJ(Mat mat)
1500 {
1501   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1502   int        ierr;
1503 
1504   PetscFunctionBegin;
1505   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1506   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 EXTERN_C_END
1510 
1511 #include "petscpc.h"
1512 EXTERN_C_BEGIN
1513 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1514 EXTERN_C_END
1515 
1516 EXTERN_C_BEGIN
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatCreate_MPIAIJ"
1519 int MatCreate_MPIAIJ(Mat B)
1520 {
1521   Mat_MPIAIJ *b;
1522   int        ierr,i,size;
1523 
1524   PetscFunctionBegin;
1525   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1526 
1527   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1528   B->data         = (void*)b;
1529   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1530   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1531   B->factor       = 0;
1532   B->assembled    = PETSC_FALSE;
1533   B->mapping      = 0;
1534 
1535   B->insertmode      = NOT_SET_VALUES;
1536   b->size            = size;
1537   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1538 
1539   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1540   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1541 
1542   /* the information in the maps duplicates the information computed below, eventually
1543      we should remove the duplicate information that is not contained in the maps */
1544   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1545   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1546 
1547   /* build local table of row and column ownerships */
1548   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1549   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1550   b->cowners = b->rowners + b->size + 2;
1551   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1552   b->rowners[0] = 0;
1553   for (i=2; i<=b->size; i++) {
1554     b->rowners[i] += b->rowners[i-1];
1555   }
1556   b->rstart = b->rowners[b->rank];
1557   b->rend   = b->rowners[b->rank+1];
1558   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1559   b->cowners[0] = 0;
1560   for (i=2; i<=b->size; i++) {
1561     b->cowners[i] += b->cowners[i-1];
1562   }
1563   b->cstart = b->cowners[b->rank];
1564   b->cend   = b->cowners[b->rank+1];
1565 
1566   /* build cache for off array entries formed */
1567   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1568   b->donotstash  = PETSC_FALSE;
1569   b->colmap      = 0;
1570   b->garray      = 0;
1571   b->roworiented = PETSC_TRUE;
1572 
1573   /* stuff used for matrix vector multiply */
1574   b->lvec      = PETSC_NULL;
1575   b->Mvctx     = PETSC_NULL;
1576 
1577   /* stuff for MatGetRow() */
1578   b->rowindices   = 0;
1579   b->rowvalues    = 0;
1580   b->getrowactive = PETSC_FALSE;
1581 
1582 /* #if defined(PETSC_HAVE_SUPERLUDIST)
1583 
1584   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1585   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1586 #endif */
1587 
1588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1589                                      "MatStoreValues_MPIAIJ",
1590                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1592                                      "MatRetrieveValues_MPIAIJ",
1593                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1595                                      "MatGetDiagonalBlock_MPIAIJ",
1596                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1597 
1598   PetscFunctionReturn(0);
1599 }
1600 EXTERN_C_END
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1604 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1605 {
1606   Mat        mat;
1607   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1608   int        ierr;
1609 
1610   PetscFunctionBegin;
1611   *newmat       = 0;
1612   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1613   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1614   a    = (Mat_MPIAIJ*)mat->data;
1615   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1616   mat->factor       = matin->factor;
1617   mat->assembled    = PETSC_TRUE;
1618   mat->insertmode   = NOT_SET_VALUES;
1619   mat->preallocated = PETSC_TRUE;
1620 
1621   a->rstart       = oldmat->rstart;
1622   a->rend         = oldmat->rend;
1623   a->cstart       = oldmat->cstart;
1624   a->cend         = oldmat->cend;
1625   a->size         = oldmat->size;
1626   a->rank         = oldmat->rank;
1627   a->donotstash   = oldmat->donotstash;
1628   a->roworiented  = oldmat->roworiented;
1629   a->rowindices   = 0;
1630   a->rowvalues    = 0;
1631   a->getrowactive = PETSC_FALSE;
1632 
1633   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1634   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1635   if (oldmat->colmap) {
1636 #if defined (PETSC_USE_CTABLE)
1637     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1638 #else
1639     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1640     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1641     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1642 #endif
1643   } else a->colmap = 0;
1644   if (oldmat->garray) {
1645     int len;
1646     len  = oldmat->B->n;
1647     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1648     PetscLogObjectMemory(mat,len*sizeof(int));
1649     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1650   } else a->garray = 0;
1651 
1652   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1653   PetscLogObjectParent(mat,a->lvec);
1654   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1655   PetscLogObjectParent(mat,a->Mvctx);
1656   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1657   PetscLogObjectParent(mat,a->A);
1658   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1659   PetscLogObjectParent(mat,a->B);
1660   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1661   *newmat = mat;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #include "petscsys.h"
1666 
1667 EXTERN_C_BEGIN
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatLoad_MPIAIJ"
1670 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1671 {
1672   Mat          A;
1673   PetscScalar  *vals,*svals;
1674   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1675   MPI_Status   status;
1676   int          i,nz,ierr,j,rstart,rend,fd;
1677   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1678   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1679   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1680 
1681   PetscFunctionBegin;
1682   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1683   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1684   if (!rank) {
1685     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1686     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1687     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1688     if (header[3] < 0) {
1689       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1690     }
1691   }
1692 
1693   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1694   M = header[1]; N = header[2];
1695   /* determine ownership of all rows */
1696   m = M/size + ((M % size) > rank);
1697   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1698   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1699   rowners[0] = 0;
1700   for (i=2; i<=size; i++) {
1701     rowners[i] += rowners[i-1];
1702   }
1703   rstart = rowners[rank];
1704   rend   = rowners[rank+1];
1705 
1706   /* distribute row lengths to all processors */
1707   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1708   offlens = ourlens + (rend-rstart);
1709   if (!rank) {
1710     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1711     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1712     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1713     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1714     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1715     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1716   } else {
1717     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1718   }
1719 
1720   if (!rank) {
1721     /* calculate the number of nonzeros on each processor */
1722     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1723     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1724     for (i=0; i<size; i++) {
1725       for (j=rowners[i]; j< rowners[i+1]; j++) {
1726         procsnz[i] += rowlengths[j];
1727       }
1728     }
1729     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1730 
1731     /* determine max buffer needed and allocate it */
1732     maxnz = 0;
1733     for (i=0; i<size; i++) {
1734       maxnz = PetscMax(maxnz,procsnz[i]);
1735     }
1736     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1737 
1738     /* read in my part of the matrix column indices  */
1739     nz   = procsnz[0];
1740     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1741     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1742 
1743     /* read in every one elses and ship off */
1744     for (i=1; i<size; i++) {
1745       nz   = procsnz[i];
1746       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1747       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1748     }
1749     ierr = PetscFree(cols);CHKERRQ(ierr);
1750   } else {
1751     /* determine buffer space needed for message */
1752     nz = 0;
1753     for (i=0; i<m; i++) {
1754       nz += ourlens[i];
1755     }
1756     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1757 
1758     /* receive message of column indices*/
1759     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1760     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1761     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1762   }
1763 
1764   /* determine column ownership if matrix is not square */
1765   if (N != M) {
1766     n      = N/size + ((N % size) > rank);
1767     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1768     cstart = cend - n;
1769   } else {
1770     cstart = rstart;
1771     cend   = rend;
1772     n      = cend - cstart;
1773   }
1774 
1775   /* loop over local rows, determining number of off diagonal entries */
1776   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1777   jj = 0;
1778   for (i=0; i<m; i++) {
1779     for (j=0; j<ourlens[i]; j++) {
1780       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1781       jj++;
1782     }
1783   }
1784 
1785   /* create our matrix */
1786   for (i=0; i<m; i++) {
1787     ourlens[i] -= offlens[i];
1788   }
1789   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1790   A = *newmat;
1791   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1792   for (i=0; i<m; i++) {
1793     ourlens[i] += offlens[i];
1794   }
1795 
1796   if (!rank) {
1797     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1798 
1799     /* read in my part of the matrix numerical values  */
1800     nz   = procsnz[0];
1801     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1802 
1803     /* insert into matrix */
1804     jj      = rstart;
1805     smycols = mycols;
1806     svals   = vals;
1807     for (i=0; i<m; i++) {
1808       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1809       smycols += ourlens[i];
1810       svals   += ourlens[i];
1811       jj++;
1812     }
1813 
1814     /* read in other processors and ship out */
1815     for (i=1; i<size; i++) {
1816       nz   = procsnz[i];
1817       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1818       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1819     }
1820     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1821   } else {
1822     /* receive numeric values */
1823     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1824 
1825     /* receive message of values*/
1826     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1827     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1828     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1829 
1830     /* insert into matrix */
1831     jj      = rstart;
1832     smycols = mycols;
1833     svals   = vals;
1834     for (i=0; i<m; i++) {
1835       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1836       smycols += ourlens[i];
1837       svals   += ourlens[i];
1838       jj++;
1839     }
1840   }
1841   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1842   ierr = PetscFree(vals);CHKERRQ(ierr);
1843   ierr = PetscFree(mycols);CHKERRQ(ierr);
1844   ierr = PetscFree(rowners);CHKERRQ(ierr);
1845 
1846   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1847   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 EXTERN_C_END
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1854 /*
1855     Not great since it makes two copies of the submatrix, first an SeqAIJ
1856   in local and then by concatenating the local matrices the end result.
1857   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1858 */
1859 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1860 {
1861   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1862   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1863   Mat          *local,M,Mreuse;
1864   PetscScalar  *vwork,*aa;
1865   MPI_Comm     comm = mat->comm;
1866   Mat_SeqAIJ   *aij;
1867 
1868 
1869   PetscFunctionBegin;
1870   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1871   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1872 
1873   if (call ==  MAT_REUSE_MATRIX) {
1874     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1875     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1876     local = &Mreuse;
1877     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1878   } else {
1879     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1880     Mreuse = *local;
1881     ierr   = PetscFree(local);CHKERRQ(ierr);
1882   }
1883 
1884   /*
1885       m - number of local rows
1886       n - number of columns (same on all processors)
1887       rstart - first row in new global matrix generated
1888   */
1889   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1890   if (call == MAT_INITIAL_MATRIX) {
1891     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1892     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1893     ii  = aij->i;
1894     jj  = aij->j;
1895 
1896     /*
1897         Determine the number of non-zeros in the diagonal and off-diagonal
1898         portions of the matrix in order to do correct preallocation
1899     */
1900 
1901     /* first get start and end of "diagonal" columns */
1902     if (csize == PETSC_DECIDE) {
1903       nlocal = n/size + ((n % size) > rank);
1904     } else {
1905       nlocal = csize;
1906     }
1907     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1908     rstart = rend - nlocal;
1909     if (rank == size - 1 && rend != n) {
1910       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1911     }
1912 
1913     /* next, compute all the lengths */
1914     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1915     olens = dlens + m;
1916     for (i=0; i<m; i++) {
1917       jend = ii[i+1] - ii[i];
1918       olen = 0;
1919       dlen = 0;
1920       for (j=0; j<jend; j++) {
1921         if (*jj < rstart || *jj >= rend) olen++;
1922         else dlen++;
1923         jj++;
1924       }
1925       olens[i] = olen;
1926       dlens[i] = dlen;
1927     }
1928     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1929     ierr = PetscFree(dlens);CHKERRQ(ierr);
1930   } else {
1931     int ml,nl;
1932 
1933     M = *newmat;
1934     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1935     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1936     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1937     /*
1938          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1939        rather than the slower MatSetValues().
1940     */
1941     M->was_assembled = PETSC_TRUE;
1942     M->assembled     = PETSC_FALSE;
1943   }
1944   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1945   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1946   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1947   ii  = aij->i;
1948   jj  = aij->j;
1949   aa  = aij->a;
1950   for (i=0; i<m; i++) {
1951     row   = rstart + i;
1952     nz    = ii[i+1] - ii[i];
1953     cwork = jj;     jj += nz;
1954     vwork = aa;     aa += nz;
1955     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1956   }
1957 
1958   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1959   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1960   *newmat = M;
1961 
1962   /* save submatrix used in processor for next request */
1963   if (call ==  MAT_INITIAL_MATRIX) {
1964     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1965     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1966   }
1967 
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1973 /*@C
1974    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1975    (the default parallel PETSc format).  For good matrix assembly performance
1976    the user should preallocate the matrix storage by setting the parameters
1977    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1978    performance can be increased by more than a factor of 50.
1979 
1980    Collective on MPI_Comm
1981 
1982    Input Parameters:
1983 +  A - the matrix
1984 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1985            (same value is used for all local rows)
1986 .  d_nnz - array containing the number of nonzeros in the various rows of the
1987            DIAGONAL portion of the local submatrix (possibly different for each row)
1988            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1989            The size of this array is equal to the number of local rows, i.e 'm'.
1990            You must leave room for the diagonal entry even if it is zero.
1991 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1992            submatrix (same value is used for all local rows).
1993 -  o_nnz - array containing the number of nonzeros in the various rows of the
1994            OFF-DIAGONAL portion of the local submatrix (possibly different for
1995            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1996            structure. The size of this array is equal to the number
1997            of local rows, i.e 'm'.
1998 
1999    The AIJ format (also called the Yale sparse matrix format or
2000    compressed row storage), is fully compatible with standard Fortran 77
2001    storage.  That is, the stored row and column indices can begin at
2002    either one (as in Fortran) or zero.  See the users manual for details.
2003 
2004    The user MUST specify either the local or global matrix dimensions
2005    (possibly both).
2006 
2007    The parallel matrix is partitioned such that the first m0 rows belong to
2008    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2009    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2010 
2011    The DIAGONAL portion of the local submatrix of a processor can be defined
2012    as the submatrix which is obtained by extraction the part corresponding
2013    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2014    first row that belongs to the processor, and r2 is the last row belonging
2015    to the this processor. This is a square mxm matrix. The remaining portion
2016    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2017 
2018    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2019 
2020    By default, this format uses inodes (identical nodes) when possible.
2021    We search for consecutive rows with the same nonzero structure, thereby
2022    reusing matrix information to achieve increased efficiency.
2023 
2024    Options Database Keys:
2025 +  -mat_aij_no_inode  - Do not use inodes
2026 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2027 -  -mat_aij_oneindex - Internally use indexing starting at 1
2028         rather than 0.  Note that when calling MatSetValues(),
2029         the user still MUST index entries starting at 0!
2030 
2031    Example usage:
2032 
2033    Consider the following 8x8 matrix with 34 non-zero values, that is
2034    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2035    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2036    as follows:
2037 
2038 .vb
2039             1  2  0  |  0  3  0  |  0  4
2040     Proc0   0  5  6  |  7  0  0  |  8  0
2041             9  0 10  | 11  0  0  | 12  0
2042     -------------------------------------
2043            13  0 14  | 15 16 17  |  0  0
2044     Proc1   0 18  0  | 19 20 21  |  0  0
2045             0  0  0  | 22 23  0  | 24  0
2046     -------------------------------------
2047     Proc2  25 26 27  |  0  0 28  | 29  0
2048            30  0  0  | 31 32 33  |  0 34
2049 .ve
2050 
2051    This can be represented as a collection of submatrices as:
2052 
2053 .vb
2054       A B C
2055       D E F
2056       G H I
2057 .ve
2058 
2059    Where the submatrices A,B,C are owned by proc0, D,E,F are
2060    owned by proc1, G,H,I are owned by proc2.
2061 
2062    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2063    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2064    The 'M','N' parameters are 8,8, and have the same values on all procs.
2065 
2066    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2067    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2068    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2069    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2070    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2071    matrix, ans [DF] as another SeqAIJ matrix.
2072 
2073    When d_nz, o_nz parameters are specified, d_nz storage elements are
2074    allocated for every row of the local diagonal submatrix, and o_nz
2075    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2076    One way to choose d_nz and o_nz is to use the max nonzerors per local
2077    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2078    In this case, the values of d_nz,o_nz are:
2079 .vb
2080      proc0 : dnz = 2, o_nz = 2
2081      proc1 : dnz = 3, o_nz = 2
2082      proc2 : dnz = 1, o_nz = 4
2083 .ve
2084    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2085    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2086    for proc3. i.e we are using 12+15+10=37 storage locations to store
2087    34 values.
2088 
2089    When d_nnz, o_nnz parameters are specified, the storage is specified
2090    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2091    In the above case the values for d_nnz,o_nnz are:
2092 .vb
2093      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2094      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2095      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2096 .ve
2097    Here the space allocated is sum of all the above values i.e 34, and
2098    hence pre-allocation is perfect.
2099 
2100    Level: intermediate
2101 
2102 .keywords: matrix, aij, compressed row, sparse, parallel
2103 
2104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2105 @*/
2106 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2107 {
2108   Mat_MPIAIJ   *b;
2109   int          ierr,i;
2110   PetscTruth   flg2;
2111 
2112   PetscFunctionBegin;
2113   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2114   if (!flg2) PetscFunctionReturn(0);
2115   B->preallocated = PETSC_TRUE;
2116   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2117   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2118   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2119   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2120   if (d_nnz) {
2121     for (i=0; i<B->m; i++) {
2122       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2123     }
2124   }
2125   if (o_nnz) {
2126     for (i=0; i<B->m; i++) {
2127       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2128     }
2129   }
2130   b = (Mat_MPIAIJ*)B->data;
2131 
2132   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2133   PetscLogObjectParent(B,b->A);
2134   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2135   PetscLogObjectParent(B,b->B);
2136 
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatCreateMPIAIJ"
2142 /*@C
2143    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2144    (the default parallel PETSc format).  For good matrix assembly performance
2145    the user should preallocate the matrix storage by setting the parameters
2146    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2147    performance can be increased by more than a factor of 50.
2148 
2149    Collective on MPI_Comm
2150 
2151    Input Parameters:
2152 +  comm - MPI communicator
2153 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2154            This value should be the same as the local size used in creating the
2155            y vector for the matrix-vector product y = Ax.
2156 .  n - This value should be the same as the local size used in creating the
2157        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2158        calculated if N is given) For square matrices n is almost always m.
2159 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2160 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2161 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2162            (same value is used for all local rows)
2163 .  d_nnz - array containing the number of nonzeros in the various rows of the
2164            DIAGONAL portion of the local submatrix (possibly different for each row)
2165            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2166            The size of this array is equal to the number of local rows, i.e 'm'.
2167            You must leave room for the diagonal entry even if it is zero.
2168 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2169            submatrix (same value is used for all local rows).
2170 -  o_nnz - array containing the number of nonzeros in the various rows of the
2171            OFF-DIAGONAL portion of the local submatrix (possibly different for
2172            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2173            structure. The size of this array is equal to the number
2174            of local rows, i.e 'm'.
2175 
2176    Output Parameter:
2177 .  A - the matrix
2178 
2179    Notes:
2180    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2181    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2182    storage requirements for this matrix.
2183 
2184    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2185    processor than it must be used on all processors that share the object for
2186    that argument.
2187 
2188    The AIJ format (also called the Yale sparse matrix format or
2189    compressed row storage), is fully compatible with standard Fortran 77
2190    storage.  That is, the stored row and column indices can begin at
2191    either one (as in Fortran) or zero.  See the users manual for details.
2192 
2193    The user MUST specify either the local or global matrix dimensions
2194    (possibly both).
2195 
2196    The parallel matrix is partitioned such that the first m0 rows belong to
2197    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2198    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2199 
2200    The DIAGONAL portion of the local submatrix of a processor can be defined
2201    as the submatrix which is obtained by extraction the part corresponding
2202    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2203    first row that belongs to the processor, and r2 is the last row belonging
2204    to the this processor. This is a square mxm matrix. The remaining portion
2205    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2206 
2207    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2208 
2209    By default, this format uses inodes (identical nodes) when possible.
2210    We search for consecutive rows with the same nonzero structure, thereby
2211    reusing matrix information to achieve increased efficiency.
2212 
2213    Options Database Keys:
2214 +  -mat_aij_no_inode  - Do not use inodes
2215 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2216 -  -mat_aij_oneindex - Internally use indexing starting at 1
2217         rather than 0.  Note that when calling MatSetValues(),
2218         the user still MUST index entries starting at 0!
2219 
2220 
2221    Example usage:
2222 
2223    Consider the following 8x8 matrix with 34 non-zero values, that is
2224    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2225    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2226    as follows:
2227 
2228 .vb
2229             1  2  0  |  0  3  0  |  0  4
2230     Proc0   0  5  6  |  7  0  0  |  8  0
2231             9  0 10  | 11  0  0  | 12  0
2232     -------------------------------------
2233            13  0 14  | 15 16 17  |  0  0
2234     Proc1   0 18  0  | 19 20 21  |  0  0
2235             0  0  0  | 22 23  0  | 24  0
2236     -------------------------------------
2237     Proc2  25 26 27  |  0  0 28  | 29  0
2238            30  0  0  | 31 32 33  |  0 34
2239 .ve
2240 
2241    This can be represented as a collection of submatrices as:
2242 
2243 .vb
2244       A B C
2245       D E F
2246       G H I
2247 .ve
2248 
2249    Where the submatrices A,B,C are owned by proc0, D,E,F are
2250    owned by proc1, G,H,I are owned by proc2.
2251 
2252    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2253    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2254    The 'M','N' parameters are 8,8, and have the same values on all procs.
2255 
2256    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2257    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2258    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2259    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2260    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2261    matrix, ans [DF] as another SeqAIJ matrix.
2262 
2263    When d_nz, o_nz parameters are specified, d_nz storage elements are
2264    allocated for every row of the local diagonal submatrix, and o_nz
2265    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2266    One way to choose d_nz and o_nz is to use the max nonzerors per local
2267    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2268    In this case, the values of d_nz,o_nz are:
2269 .vb
2270      proc0 : dnz = 2, o_nz = 2
2271      proc1 : dnz = 3, o_nz = 2
2272      proc2 : dnz = 1, o_nz = 4
2273 .ve
2274    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2275    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2276    for proc3. i.e we are using 12+15+10=37 storage locations to store
2277    34 values.
2278 
2279    When d_nnz, o_nnz parameters are specified, the storage is specified
2280    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2281    In the above case the values for d_nnz,o_nnz are:
2282 .vb
2283      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2284      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2285      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2286 .ve
2287    Here the space allocated is sum of all the above values i.e 34, and
2288    hence pre-allocation is perfect.
2289 
2290    Level: intermediate
2291 
2292 .keywords: matrix, aij, compressed row, sparse, parallel
2293 
2294 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2295 @*/
2296 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2297 {
2298   int ierr,size;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2303   if (size > 1) {
2304     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2305     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2306   } else {
2307     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2308     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2309   }
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2315 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2316 {
2317   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2318   PetscFunctionBegin;
2319   *Ad     = a->A;
2320   *Ao     = a->B;
2321   *colmap = a->garray;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2327 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2328 {
2329   int        ierr;
2330   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2331 
2332   PetscFunctionBegin;
2333   if (coloring->ctype == IS_COLORING_LOCAL) {
2334     int        *allcolors,*colors,i;
2335     ISColoring ocoloring;
2336 
2337     /* set coloring for diagonal portion */
2338     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2339 
2340     /* set coloring for off-diagonal portion */
2341     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2342     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2343     for (i=0; i<a->B->n; i++) {
2344       colors[i] = allcolors[a->garray[i]];
2345     }
2346     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2347     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2348     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2349     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2350   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2351     int        *colors,i,*larray;
2352     ISColoring ocoloring;
2353 
2354     /* set coloring for diagonal portion */
2355     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2356     for (i=0; i<a->A->n; i++) {
2357       larray[i] = i + a->cstart;
2358     }
2359     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2360     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2361     for (i=0; i<a->A->n; i++) {
2362       colors[i] = coloring->colors[larray[i]];
2363     }
2364     ierr = PetscFree(larray);CHKERRQ(ierr);
2365     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2366     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2367     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2368 
2369     /* set coloring for off-diagonal portion */
2370     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2371     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2372     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2373     for (i=0; i<a->B->n; i++) {
2374       colors[i] = coloring->colors[larray[i]];
2375     }
2376     ierr = PetscFree(larray);CHKERRQ(ierr);
2377     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2378     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2379     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2380   } else {
2381     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2382   }
2383 
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2389 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2390 {
2391   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2392   int        ierr;
2393 
2394   PetscFunctionBegin;
2395   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2396   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2402 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2403 {
2404   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2405   int        ierr;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2409   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412