xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2b2e770305dd46d7390015fc3b2a8f100c37da8b)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
722   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
723   PetscTruth        isdraw,isascii,flg;
724   PetscViewer       sviewer;
725   PetscViewerFormat format;
726 
727   PetscFunctionBegin;
728   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
730   if (isascii) {
731     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
732     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
733       MatInfo info;
734       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
735       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
737       if (flg) {
738         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
739 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
740       } else {
741         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
742 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
743       }
744       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
749       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
750       PetscFunctionReturn(0);
751     } else if (format == PETSC_VIEWER_ASCII_INFO) {
752       PetscFunctionReturn(0);
753     }
754   } else if (isdraw) {
755     PetscDraw       draw;
756     PetscTruth isnull;
757     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
758     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
759   }
760 
761   if (size == 1) {
762     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
763     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
764   } else {
765     /* assemble the entire matrix onto first processor. */
766     Mat         A;
767     Mat_SeqAIJ *Aloc;
768     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
769     PetscScalar *a;
770 
771     if (!rank) {
772       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     } else {
774       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
775     }
776     PetscLogObjectParent(mat,A);
777 
778     /* copy over the A part */
779     Aloc = (Mat_SeqAIJ*)aij->A->data;
780     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781     row = aij->rstart;
782     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
783     for (i=0; i<m; i++) {
784       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
785       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
786     }
787     aj = Aloc->j;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
789 
790     /* copy over the B part */
791     Aloc = (Mat_SeqAIJ*)aij->B->data;
792     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
793     row  = aij->rstart;
794     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
795     ct   = cols;
796     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
797     for (i=0; i<m; i++) {
798       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800     }
801     ierr = PetscFree(ct);CHKERRQ(ierr);
802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     /*
805        Everyone has to call to draw the matrix since the graphics waits are
806        synchronized across all processors that share the PetscDraw object
807     */
808     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
809     if (!rank) {
810       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
811       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
812     }
813     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     ierr = MatDestroy(A);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatView_MPIAIJ"
821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
822 {
823   int        ierr;
824   PetscTruth isascii,isdraw,issocket,isbinary;
825 
826   PetscFunctionBegin;
827   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
828   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
829   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
830   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
831   if (isascii || isdraw || isbinary || issocket) {
832     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
833   } else {
834     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatRelax_MPIAIJ"
843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
844 {
845   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
846   int          ierr;
847   Vec          bb1;
848   PetscScalar  mone=-1.0;
849 
850   PetscFunctionBegin;
851   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
852 
853   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
854 
855   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
856     if (flag & SOR_ZERO_INITIAL_GUESS) {
857       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
858       its--;
859     }
860 
861     while (its--) {
862       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
863       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
864 
865       /* update rhs: bb1 = bb - B*x */
866       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
867       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
868 
869       /* local sweep */
870       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
871       CHKERRQ(ierr);
872     }
873   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
876       its--;
877     }
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else {
908     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
909   }
910 
911   ierr = VecDestroy(bb1);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatGetInfo_MPIAIJ"
917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
918 {
919   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
920   Mat        A = mat->A,B = mat->B;
921   int        ierr;
922   PetscReal  isend[5],irecv[5];
923 
924   PetscFunctionBegin;
925   info->block_size     = 1.0;
926   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
927   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
928   isend[3] = info->memory;  isend[4] = info->mallocs;
929   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
930   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
931   isend[3] += info->memory;  isend[4] += info->mallocs;
932   if (flag == MAT_LOCAL) {
933     info->nz_used      = isend[0];
934     info->nz_allocated = isend[1];
935     info->nz_unneeded  = isend[2];
936     info->memory       = isend[3];
937     info->mallocs      = isend[4];
938   } else if (flag == MAT_GLOBAL_MAX) {
939     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
940     info->nz_used      = irecv[0];
941     info->nz_allocated = irecv[1];
942     info->nz_unneeded  = irecv[2];
943     info->memory       = irecv[3];
944     info->mallocs      = irecv[4];
945   } else if (flag == MAT_GLOBAL_SUM) {
946     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
947     info->nz_used      = irecv[0];
948     info->nz_allocated = irecv[1];
949     info->nz_unneeded  = irecv[2];
950     info->memory       = irecv[3];
951     info->mallocs      = irecv[4];
952   }
953   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
954   info->fill_ratio_needed = 0;
955   info->factor_mallocs    = 0;
956   info->rows_global       = (double)matin->M;
957   info->columns_global    = (double)matin->N;
958   info->rows_local        = (double)matin->m;
959   info->columns_local     = (double)matin->N;
960 
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatSetOption_MPIAIJ"
966 int MatSetOption_MPIAIJ(Mat A,MatOption op)
967 {
968   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
969   int        ierr;
970 
971   PetscFunctionBegin;
972   switch (op) {
973   case MAT_NO_NEW_NONZERO_LOCATIONS:
974   case MAT_YES_NEW_NONZERO_LOCATIONS:
975   case MAT_COLUMNS_UNSORTED:
976   case MAT_COLUMNS_SORTED:
977   case MAT_NEW_NONZERO_ALLOCATION_ERR:
978   case MAT_KEEP_ZEROED_ROWS:
979   case MAT_NEW_NONZERO_LOCATION_ERR:
980   case MAT_USE_INODES:
981   case MAT_DO_NOT_USE_INODES:
982   case MAT_IGNORE_ZERO_ENTRIES:
983     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
984     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
985     break;
986   case MAT_ROW_ORIENTED:
987     a->roworiented = PETSC_TRUE;
988     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
989     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
990     break;
991   case MAT_ROWS_SORTED:
992   case MAT_ROWS_UNSORTED:
993   case MAT_YES_NEW_DIAGONALS:
994   case MAT_USE_SINGLE_PRECISION_SOLVES:
995     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
996     break;
997   case MAT_COLUMN_ORIENTED:
998     a->roworiented = PETSC_FALSE;
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_IGNORE_OFF_PROC_ENTRIES:
1003     a->donotstash = PETSC_TRUE;
1004     break;
1005   case MAT_NO_NEW_DIAGONALS:
1006     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1007     break;
1008   default:
1009     SETERRQ(PETSC_ERR_SUP,"unknown option");
1010     break;
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPIAIJ"
1017 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1018 {
1019   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1021   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1022   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1023   int          *cmap,*idx_p;
1024 
1025   PetscFunctionBegin;
1026   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1027   mat->getrowactive = PETSC_TRUE;
1028 
1029   if (!mat->rowvalues && (idx || v)) {
1030     /*
1031         allocate enough space to hold information from the longest row.
1032     */
1033     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1034     int     max = 1,tmp;
1035     for (i=0; i<matin->m; i++) {
1036       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1037       if (max < tmp) { max = tmp; }
1038     }
1039     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1040     mat->rowindices = (int*)(mat->rowvalues + max);
1041   }
1042 
1043   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1044   lrow = row - rstart;
1045 
1046   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1047   if (!v)   {pvA = 0; pvB = 0;}
1048   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1049   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1050   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1051   nztot = nzA + nzB;
1052 
1053   cmap  = mat->garray;
1054   if (v  || idx) {
1055     if (nztot) {
1056       /* Sort by increasing column numbers, assuming A and B already sorted */
1057       int imark = -1;
1058       if (v) {
1059         *v = v_p = mat->rowvalues;
1060         for (i=0; i<nzB; i++) {
1061           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1062           else break;
1063         }
1064         imark = i;
1065         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1066         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1067       }
1068       if (idx) {
1069         *idx = idx_p = mat->rowindices;
1070         if (imark > -1) {
1071           for (i=0; i<imark; i++) {
1072             idx_p[i] = cmap[cworkB[i]];
1073           }
1074         } else {
1075           for (i=0; i<nzB; i++) {
1076             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1077             else break;
1078           }
1079           imark = i;
1080         }
1081         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1082         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1083       }
1084     } else {
1085       if (idx) *idx = 0;
1086       if (v)   *v   = 0;
1087     }
1088   }
1089   *nz = nztot;
1090   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1091   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1097 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1098 {
1099   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1100 
1101   PetscFunctionBegin;
1102   if (aij->getrowactive == PETSC_FALSE) {
1103     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1104   }
1105   aij->getrowactive = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatNorm_MPIAIJ"
1111 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1112 {
1113   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1114   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1115   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1116   PetscReal    sum = 0.0;
1117   PetscScalar  *v;
1118 
1119   PetscFunctionBegin;
1120   if (aij->size == 1) {
1121     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1122   } else {
1123     if (type == NORM_FROBENIUS) {
1124       v = amat->a;
1125       for (i=0; i<amat->nz; i++) {
1126 #if defined(PETSC_USE_COMPLEX)
1127         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1128 #else
1129         sum += (*v)*(*v); v++;
1130 #endif
1131       }
1132       v = bmat->a;
1133       for (i=0; i<bmat->nz; i++) {
1134 #if defined(PETSC_USE_COMPLEX)
1135         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1136 #else
1137         sum += (*v)*(*v); v++;
1138 #endif
1139       }
1140       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1141       *norm = sqrt(*norm);
1142     } else if (type == NORM_1) { /* max column norm */
1143       PetscReal *tmp,*tmp2;
1144       int    *jj,*garray = aij->garray;
1145       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1146       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1147       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1148       *norm = 0.0;
1149       v = amat->a; jj = amat->j;
1150       for (j=0; j<amat->nz; j++) {
1151         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1152       }
1153       v = bmat->a; jj = bmat->j;
1154       for (j=0; j<bmat->nz; j++) {
1155         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1156       }
1157       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1158       for (j=0; j<mat->N; j++) {
1159         if (tmp2[j] > *norm) *norm = tmp2[j];
1160       }
1161       ierr = PetscFree(tmp);CHKERRQ(ierr);
1162       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1163     } else if (type == NORM_INFINITY) { /* max row norm */
1164       PetscReal ntemp = 0.0;
1165       for (j=0; j<aij->A->m; j++) {
1166         v = amat->a + amat->i[j] + shift;
1167         sum = 0.0;
1168         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1169           sum += PetscAbsScalar(*v); v++;
1170         }
1171         v = bmat->a + bmat->i[j] + shift;
1172         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1173           sum += PetscAbsScalar(*v); v++;
1174         }
1175         if (sum > ntemp) ntemp = sum;
1176       }
1177       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1178     } else {
1179       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1180     }
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPIAIJ"
1187 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1188 {
1189   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1190   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1191   int          ierr,shift = Aloc->indexshift;
1192   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1193   Mat          B;
1194   PetscScalar  *array;
1195 
1196   PetscFunctionBegin;
1197   if (!matout && M != N) {
1198     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1199   }
1200 
1201   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1202 
1203   /* copy over the A part */
1204   Aloc = (Mat_SeqAIJ*)a->A->data;
1205   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1206   row = a->rstart;
1207   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1208   for (i=0; i<m; i++) {
1209     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1210     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1211   }
1212   aj = Aloc->j;
1213   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1214 
1215   /* copy over the B part */
1216   Aloc = (Mat_SeqAIJ*)a->B->data;
1217   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1218   row  = a->rstart;
1219   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1220   ct   = cols;
1221   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1222   for (i=0; i<m; i++) {
1223     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1224     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1225   }
1226   ierr = PetscFree(ct);CHKERRQ(ierr);
1227   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   if (matout) {
1230     *matout = B;
1231   } else {
1232     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1239 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1240 {
1241   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1242   Mat        a = aij->A,b = aij->B;
1243   int        ierr,s1,s2,s3;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1247   if (rr) {
1248     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1249     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1250     /* Overlap communication with computation. */
1251     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1252   }
1253   if (ll) {
1254     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1255     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1256     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1257   }
1258   /* scale  the diagonal block */
1259   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1260 
1261   if (rr) {
1262     /* Do a scatter end and then right scale the off-diagonal block */
1263     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1264     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1265   }
1266 
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1273 int MatPrintHelp_MPIAIJ(Mat A)
1274 {
1275   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1276   int        ierr;
1277 
1278   PetscFunctionBegin;
1279   if (!a->rank) {
1280     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1287 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1288 {
1289   PetscFunctionBegin;
1290   *bs = 1;
1291   PetscFunctionReturn(0);
1292 }
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1295 int MatSetUnfactored_MPIAIJ(Mat A)
1296 {
1297   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1298   int        ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatEqual_MPIAIJ"
1307 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1308 {
1309   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1310   Mat        a,b,c,d;
1311   PetscTruth flg;
1312   int        ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1316   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1317   a = matA->A; b = matA->B;
1318   c = matB->A; d = matB->B;
1319 
1320   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1321   if (flg == PETSC_TRUE) {
1322     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1323   }
1324   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCopy_MPIAIJ"
1330 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1331 {
1332   int        ierr;
1333   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1334   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1335   PetscTruth flg;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1339   if (str != SAME_NONZERO_PATTERN || !flg) {
1340     /* because of the column compression in the off-processor part of the matrix a->B,
1341        the number of columns in a->B and b->B may be different, hence we cannot call
1342        the MatCopy() directly on the two parts. If need be, we can provide a more
1343        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1344        then copying the submatrices */
1345     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1355 int MatSetUpPreallocation_MPIAIJ(Mat A)
1356 {
1357   int        ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1365 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1366 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1367 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1368 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1369 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1370 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1371 #endif
1372 
1373 #include "petscblaslapack.h"
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatAXPY_MPIAIJ"
1377 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1378 {
1379   int        ierr,one;
1380   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1381   Mat_SeqAIJ *x,*y;
1382 
1383   PetscFunctionBegin;
1384   if (str == SAME_NONZERO_PATTERN) {
1385     x  = (Mat_SeqAIJ *)xx->A->data;
1386     y  = (Mat_SeqAIJ *)yy->A->data;
1387     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1388     x  = (Mat_SeqAIJ *)xx->B->data;
1389     y  = (Mat_SeqAIJ *)yy->B->data;
1390     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1391   } else {
1392     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 /* -------------------------------------------------------------------*/
1398 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1399        MatGetRow_MPIAIJ,
1400        MatRestoreRow_MPIAIJ,
1401        MatMult_MPIAIJ,
1402        MatMultAdd_MPIAIJ,
1403        MatMultTranspose_MPIAIJ,
1404        MatMultTransposeAdd_MPIAIJ,
1405        0,
1406        0,
1407        0,
1408        0,
1409        0,
1410        0,
1411        MatRelax_MPIAIJ,
1412        MatTranspose_MPIAIJ,
1413        MatGetInfo_MPIAIJ,
1414        MatEqual_MPIAIJ,
1415        MatGetDiagonal_MPIAIJ,
1416        MatDiagonalScale_MPIAIJ,
1417        MatNorm_MPIAIJ,
1418        MatAssemblyBegin_MPIAIJ,
1419        MatAssemblyEnd_MPIAIJ,
1420        0,
1421        MatSetOption_MPIAIJ,
1422        MatZeroEntries_MPIAIJ,
1423        MatZeroRows_MPIAIJ,
1424 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1425        MatLUFactorSymbolic_MPIAIJ_TFS,
1426 #else
1427        0,
1428 #endif
1429        0,
1430        0,
1431        0,
1432        MatSetUpPreallocation_MPIAIJ,
1433        0,
1434        0,
1435        0,
1436        0,
1437        MatDuplicate_MPIAIJ,
1438        0,
1439        0,
1440        0,
1441        0,
1442        MatAXPY_MPIAIJ,
1443        MatGetSubMatrices_MPIAIJ,
1444        MatIncreaseOverlap_MPIAIJ,
1445        MatGetValues_MPIAIJ,
1446        MatCopy_MPIAIJ,
1447        MatPrintHelp_MPIAIJ,
1448        MatScale_MPIAIJ,
1449        0,
1450        0,
1451        0,
1452        MatGetBlockSize_MPIAIJ,
1453        0,
1454        0,
1455        0,
1456        0,
1457        MatFDColoringCreate_MPIAIJ,
1458        0,
1459        MatSetUnfactored_MPIAIJ,
1460        0,
1461        0,
1462        MatGetSubMatrix_MPIAIJ,
1463        MatDestroy_MPIAIJ,
1464        MatView_MPIAIJ,
1465        MatGetPetscMaps_Petsc,
1466        0,
1467        0,
1468        0,
1469        0,
1470        0,
1471        0,
1472        0,
1473        0,
1474        MatSetColoring_MPIAIJ,
1475        MatSetValuesAdic_MPIAIJ,
1476        MatSetValuesAdifor_MPIAIJ
1477 };
1478 
1479 /* ----------------------------------------------------------------------------------------*/
1480 
1481 EXTERN_C_BEGIN
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1484 int MatStoreValues_MPIAIJ(Mat mat)
1485 {
1486   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1487   int        ierr;
1488 
1489   PetscFunctionBegin;
1490   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1491   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 EXTERN_C_END
1495 
1496 EXTERN_C_BEGIN
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1499 int MatRetrieveValues_MPIAIJ(Mat mat)
1500 {
1501   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1502   int        ierr;
1503 
1504   PetscFunctionBegin;
1505   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1506   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 EXTERN_C_END
1510 
1511 #include "petscpc.h"
1512 EXTERN_C_BEGIN
1513 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1514 EXTERN_C_END
1515 
1516 EXTERN_C_BEGIN
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatCreate_MPIAIJ"
1519 int MatCreate_MPIAIJ(Mat B)
1520 {
1521   Mat_MPIAIJ *b;
1522   int        ierr,i,size;
1523 #if defined(PETSC_HAVE_SUPERLUDIST)
1524   PetscTruth flg;
1525 #endif
1526 
1527 
1528   PetscFunctionBegin;
1529   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1530 
1531   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1532   B->data         = (void*)b;
1533   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1534   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1535   B->factor       = 0;
1536   B->assembled    = PETSC_FALSE;
1537   B->mapping      = 0;
1538 
1539   B->insertmode      = NOT_SET_VALUES;
1540   b->size            = size;
1541   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1542 
1543   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1544   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1545 
1546   /* the information in the maps duplicates the information computed below, eventually
1547      we should remove the duplicate information that is not contained in the maps */
1548   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1549   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1550 
1551   /* build local table of row and column ownerships */
1552   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1553   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1554   b->cowners = b->rowners + b->size + 2;
1555   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1556   b->rowners[0] = 0;
1557   for (i=2; i<=b->size; i++) {
1558     b->rowners[i] += b->rowners[i-1];
1559   }
1560   b->rstart = b->rowners[b->rank];
1561   b->rend   = b->rowners[b->rank+1];
1562   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1563   b->cowners[0] = 0;
1564   for (i=2; i<=b->size; i++) {
1565     b->cowners[i] += b->cowners[i-1];
1566   }
1567   b->cstart = b->cowners[b->rank];
1568   b->cend   = b->cowners[b->rank+1];
1569 
1570   /* build cache for off array entries formed */
1571   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1572   b->donotstash  = PETSC_FALSE;
1573   b->colmap      = 0;
1574   b->garray      = 0;
1575   b->roworiented = PETSC_TRUE;
1576 
1577   /* stuff used for matrix vector multiply */
1578   b->lvec      = PETSC_NULL;
1579   b->Mvctx     = PETSC_NULL;
1580 
1581   /* stuff for MatGetRow() */
1582   b->rowindices   = 0;
1583   b->rowvalues    = 0;
1584   b->getrowactive = PETSC_FALSE;
1585 
1586 #if defined(PETSC_HAVE_SUPERLUDIST)
1587   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1588   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1589 #endif
1590 
1591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1592                                      "MatStoreValues_MPIAIJ",
1593                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1595                                      "MatRetrieveValues_MPIAIJ",
1596                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1598                                      "MatGetDiagonalBlock_MPIAIJ",
1599                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1600 
1601   PetscFunctionReturn(0);
1602 }
1603 EXTERN_C_END
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1607 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1608 {
1609   Mat        mat;
1610   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1611   int        ierr;
1612 
1613   PetscFunctionBegin;
1614   *newmat       = 0;
1615   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1616   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1617   a    = (Mat_MPIAIJ*)mat->data;
1618   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1619   mat->factor       = matin->factor;
1620   mat->assembled    = PETSC_TRUE;
1621   mat->insertmode   = NOT_SET_VALUES;
1622   mat->preallocated = PETSC_TRUE;
1623 
1624   a->rstart       = oldmat->rstart;
1625   a->rend         = oldmat->rend;
1626   a->cstart       = oldmat->cstart;
1627   a->cend         = oldmat->cend;
1628   a->size         = oldmat->size;
1629   a->rank         = oldmat->rank;
1630   a->donotstash   = oldmat->donotstash;
1631   a->roworiented  = oldmat->roworiented;
1632   a->rowindices   = 0;
1633   a->rowvalues    = 0;
1634   a->getrowactive = PETSC_FALSE;
1635 
1636   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1637   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1638   if (oldmat->colmap) {
1639 #if defined (PETSC_USE_CTABLE)
1640     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1641 #else
1642     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1643     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1644     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1645 #endif
1646   } else a->colmap = 0;
1647   if (oldmat->garray) {
1648     int len;
1649     len  = oldmat->B->n;
1650     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1651     PetscLogObjectMemory(mat,len*sizeof(int));
1652     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1653   } else a->garray = 0;
1654 
1655   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1656   PetscLogObjectParent(mat,a->lvec);
1657   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1658   PetscLogObjectParent(mat,a->Mvctx);
1659   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1660   PetscLogObjectParent(mat,a->A);
1661   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1662   PetscLogObjectParent(mat,a->B);
1663   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1664   *newmat = mat;
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #include "petscsys.h"
1669 
1670 EXTERN_C_BEGIN
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatLoad_MPIAIJ"
1673 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1674 {
1675   Mat          A;
1676   PetscScalar  *vals,*svals;
1677   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1678   MPI_Status   status;
1679   int          i,nz,ierr,j,rstart,rend,fd;
1680   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1681   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1682   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1683 
1684   PetscFunctionBegin;
1685   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1686   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1687   if (!rank) {
1688     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1689     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1690     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1691     if (header[3] < 0) {
1692       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1693     }
1694   }
1695 
1696   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1697   M = header[1]; N = header[2];
1698   /* determine ownership of all rows */
1699   m = M/size + ((M % size) > rank);
1700   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1701   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1702   rowners[0] = 0;
1703   for (i=2; i<=size; i++) {
1704     rowners[i] += rowners[i-1];
1705   }
1706   rstart = rowners[rank];
1707   rend   = rowners[rank+1];
1708 
1709   /* distribute row lengths to all processors */
1710   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1711   offlens = ourlens + (rend-rstart);
1712   if (!rank) {
1713     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1714     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1715     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1716     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1717     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1718     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1719   } else {
1720     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1721   }
1722 
1723   if (!rank) {
1724     /* calculate the number of nonzeros on each processor */
1725     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1726     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1727     for (i=0; i<size; i++) {
1728       for (j=rowners[i]; j< rowners[i+1]; j++) {
1729         procsnz[i] += rowlengths[j];
1730       }
1731     }
1732     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1733 
1734     /* determine max buffer needed and allocate it */
1735     maxnz = 0;
1736     for (i=0; i<size; i++) {
1737       maxnz = PetscMax(maxnz,procsnz[i]);
1738     }
1739     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1740 
1741     /* read in my part of the matrix column indices  */
1742     nz   = procsnz[0];
1743     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1744     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1745 
1746     /* read in every one elses and ship off */
1747     for (i=1; i<size; i++) {
1748       nz   = procsnz[i];
1749       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1750       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1751     }
1752     ierr = PetscFree(cols);CHKERRQ(ierr);
1753   } else {
1754     /* determine buffer space needed for message */
1755     nz = 0;
1756     for (i=0; i<m; i++) {
1757       nz += ourlens[i];
1758     }
1759     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1760 
1761     /* receive message of column indices*/
1762     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1763     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1764     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1765   }
1766 
1767   /* determine column ownership if matrix is not square */
1768   if (N != M) {
1769     n      = N/size + ((N % size) > rank);
1770     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1771     cstart = cend - n;
1772   } else {
1773     cstart = rstart;
1774     cend   = rend;
1775     n      = cend - cstart;
1776   }
1777 
1778   /* loop over local rows, determining number of off diagonal entries */
1779   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1780   jj = 0;
1781   for (i=0; i<m; i++) {
1782     for (j=0; j<ourlens[i]; j++) {
1783       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1784       jj++;
1785     }
1786   }
1787 
1788   /* create our matrix */
1789   for (i=0; i<m; i++) {
1790     ourlens[i] -= offlens[i];
1791   }
1792   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1793   A = *newmat;
1794   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1795   for (i=0; i<m; i++) {
1796     ourlens[i] += offlens[i];
1797   }
1798 
1799   if (!rank) {
1800     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1801 
1802     /* read in my part of the matrix numerical values  */
1803     nz   = procsnz[0];
1804     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1805 
1806     /* insert into matrix */
1807     jj      = rstart;
1808     smycols = mycols;
1809     svals   = vals;
1810     for (i=0; i<m; i++) {
1811       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1812       smycols += ourlens[i];
1813       svals   += ourlens[i];
1814       jj++;
1815     }
1816 
1817     /* read in other processors and ship out */
1818     for (i=1; i<size; i++) {
1819       nz   = procsnz[i];
1820       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1821       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1822     }
1823     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1824   } else {
1825     /* receive numeric values */
1826     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1827 
1828     /* receive message of values*/
1829     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1830     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1831     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1832 
1833     /* insert into matrix */
1834     jj      = rstart;
1835     smycols = mycols;
1836     svals   = vals;
1837     for (i=0; i<m; i++) {
1838       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1839       smycols += ourlens[i];
1840       svals   += ourlens[i];
1841       jj++;
1842     }
1843   }
1844   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1845   ierr = PetscFree(vals);CHKERRQ(ierr);
1846   ierr = PetscFree(mycols);CHKERRQ(ierr);
1847   ierr = PetscFree(rowners);CHKERRQ(ierr);
1848 
1849   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1850   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 EXTERN_C_END
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1857 /*
1858     Not great since it makes two copies of the submatrix, first an SeqAIJ
1859   in local and then by concatenating the local matrices the end result.
1860   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1861 */
1862 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1863 {
1864   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1865   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1866   Mat          *local,M,Mreuse;
1867   PetscScalar  *vwork,*aa;
1868   MPI_Comm     comm = mat->comm;
1869   Mat_SeqAIJ   *aij;
1870 
1871 
1872   PetscFunctionBegin;
1873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875 
1876   if (call ==  MAT_REUSE_MATRIX) {
1877     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1878     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1879     local = &Mreuse;
1880     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1881   } else {
1882     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1883     Mreuse = *local;
1884     ierr   = PetscFree(local);CHKERRQ(ierr);
1885   }
1886 
1887   /*
1888       m - number of local rows
1889       n - number of columns (same on all processors)
1890       rstart - first row in new global matrix generated
1891   */
1892   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1893   if (call == MAT_INITIAL_MATRIX) {
1894     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1895     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1896     ii  = aij->i;
1897     jj  = aij->j;
1898 
1899     /*
1900         Determine the number of non-zeros in the diagonal and off-diagonal
1901         portions of the matrix in order to do correct preallocation
1902     */
1903 
1904     /* first get start and end of "diagonal" columns */
1905     if (csize == PETSC_DECIDE) {
1906       nlocal = n/size + ((n % size) > rank);
1907     } else {
1908       nlocal = csize;
1909     }
1910     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1911     rstart = rend - nlocal;
1912     if (rank == size - 1 && rend != n) {
1913       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1914     }
1915 
1916     /* next, compute all the lengths */
1917     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1918     olens = dlens + m;
1919     for (i=0; i<m; i++) {
1920       jend = ii[i+1] - ii[i];
1921       olen = 0;
1922       dlen = 0;
1923       for (j=0; j<jend; j++) {
1924         if (*jj < rstart || *jj >= rend) olen++;
1925         else dlen++;
1926         jj++;
1927       }
1928       olens[i] = olen;
1929       dlens[i] = dlen;
1930     }
1931     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1932     ierr = PetscFree(dlens);CHKERRQ(ierr);
1933   } else {
1934     int ml,nl;
1935 
1936     M = *newmat;
1937     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1938     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1939     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1940     /*
1941          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1942        rather than the slower MatSetValues().
1943     */
1944     M->was_assembled = PETSC_TRUE;
1945     M->assembled     = PETSC_FALSE;
1946   }
1947   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1948   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1949   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1950   ii  = aij->i;
1951   jj  = aij->j;
1952   aa  = aij->a;
1953   for (i=0; i<m; i++) {
1954     row   = rstart + i;
1955     nz    = ii[i+1] - ii[i];
1956     cwork = jj;     jj += nz;
1957     vwork = aa;     aa += nz;
1958     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1959   }
1960 
1961   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   *newmat = M;
1964 
1965   /* save submatrix used in processor for next request */
1966   if (call ==  MAT_INITIAL_MATRIX) {
1967     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1968     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1969   }
1970 
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1976 /*@C
1977    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1978    (the default parallel PETSc format).  For good matrix assembly performance
1979    the user should preallocate the matrix storage by setting the parameters
1980    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1981    performance can be increased by more than a factor of 50.
1982 
1983    Collective on MPI_Comm
1984 
1985    Input Parameters:
1986 +  A - the matrix
1987 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1988            (same value is used for all local rows)
1989 .  d_nnz - array containing the number of nonzeros in the various rows of the
1990            DIAGONAL portion of the local submatrix (possibly different for each row)
1991            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1992            The size of this array is equal to the number of local rows, i.e 'm'.
1993            You must leave room for the diagonal entry even if it is zero.
1994 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1995            submatrix (same value is used for all local rows).
1996 -  o_nnz - array containing the number of nonzeros in the various rows of the
1997            OFF-DIAGONAL portion of the local submatrix (possibly different for
1998            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1999            structure. The size of this array is equal to the number
2000            of local rows, i.e 'm'.
2001 
2002    The AIJ format (also called the Yale sparse matrix format or
2003    compressed row storage), is fully compatible with standard Fortran 77
2004    storage.  That is, the stored row and column indices can begin at
2005    either one (as in Fortran) or zero.  See the users manual for details.
2006 
2007    The user MUST specify either the local or global matrix dimensions
2008    (possibly both).
2009 
2010    The parallel matrix is partitioned such that the first m0 rows belong to
2011    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2012    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2013 
2014    The DIAGONAL portion of the local submatrix of a processor can be defined
2015    as the submatrix which is obtained by extraction the part corresponding
2016    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2017    first row that belongs to the processor, and r2 is the last row belonging
2018    to the this processor. This is a square mxm matrix. The remaining portion
2019    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2020 
2021    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2022 
2023    By default, this format uses inodes (identical nodes) when possible.
2024    We search for consecutive rows with the same nonzero structure, thereby
2025    reusing matrix information to achieve increased efficiency.
2026 
2027    Options Database Keys:
2028 +  -mat_aij_no_inode  - Do not use inodes
2029 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2030 -  -mat_aij_oneindex - Internally use indexing starting at 1
2031         rather than 0.  Note that when calling MatSetValues(),
2032         the user still MUST index entries starting at 0!
2033 
2034    Example usage:
2035 
2036    Consider the following 8x8 matrix with 34 non-zero values, that is
2037    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2038    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2039    as follows:
2040 
2041 .vb
2042             1  2  0  |  0  3  0  |  0  4
2043     Proc0   0  5  6  |  7  0  0  |  8  0
2044             9  0 10  | 11  0  0  | 12  0
2045     -------------------------------------
2046            13  0 14  | 15 16 17  |  0  0
2047     Proc1   0 18  0  | 19 20 21  |  0  0
2048             0  0  0  | 22 23  0  | 24  0
2049     -------------------------------------
2050     Proc2  25 26 27  |  0  0 28  | 29  0
2051            30  0  0  | 31 32 33  |  0 34
2052 .ve
2053 
2054    This can be represented as a collection of submatrices as:
2055 
2056 .vb
2057       A B C
2058       D E F
2059       G H I
2060 .ve
2061 
2062    Where the submatrices A,B,C are owned by proc0, D,E,F are
2063    owned by proc1, G,H,I are owned by proc2.
2064 
2065    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2066    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2067    The 'M','N' parameters are 8,8, and have the same values on all procs.
2068 
2069    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2070    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2071    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2072    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2073    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2074    matrix, ans [DF] as another SeqAIJ matrix.
2075 
2076    When d_nz, o_nz parameters are specified, d_nz storage elements are
2077    allocated for every row of the local diagonal submatrix, and o_nz
2078    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2079    One way to choose d_nz and o_nz is to use the max nonzerors per local
2080    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2081    In this case, the values of d_nz,o_nz are:
2082 .vb
2083      proc0 : dnz = 2, o_nz = 2
2084      proc1 : dnz = 3, o_nz = 2
2085      proc2 : dnz = 1, o_nz = 4
2086 .ve
2087    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2088    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2089    for proc3. i.e we are using 12+15+10=37 storage locations to store
2090    34 values.
2091 
2092    When d_nnz, o_nnz parameters are specified, the storage is specified
2093    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2094    In the above case the values for d_nnz,o_nnz are:
2095 .vb
2096      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2097      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2098      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2099 .ve
2100    Here the space allocated is sum of all the above values i.e 34, and
2101    hence pre-allocation is perfect.
2102 
2103    Level: intermediate
2104 
2105 .keywords: matrix, aij, compressed row, sparse, parallel
2106 
2107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2108 @*/
2109 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2110 {
2111   Mat_MPIAIJ   *b;
2112   int          ierr,i;
2113   PetscTruth   flg2;
2114 
2115   PetscFunctionBegin;
2116   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2117   if (!flg2) PetscFunctionReturn(0);
2118   B->preallocated = PETSC_TRUE;
2119   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2120   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2121   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2122   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2123   if (d_nnz) {
2124     for (i=0; i<B->m; i++) {
2125       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2126     }
2127   }
2128   if (o_nnz) {
2129     for (i=0; i<B->m; i++) {
2130       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2131     }
2132   }
2133   b = (Mat_MPIAIJ*)B->data;
2134 
2135   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2136   PetscLogObjectParent(B,b->A);
2137   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2138   PetscLogObjectParent(B,b->B);
2139 
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatCreateMPIAIJ"
2145 /*@C
2146    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2147    (the default parallel PETSc format).  For good matrix assembly performance
2148    the user should preallocate the matrix storage by setting the parameters
2149    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2150    performance can be increased by more than a factor of 50.
2151 
2152    Collective on MPI_Comm
2153 
2154    Input Parameters:
2155 +  comm - MPI communicator
2156 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2157            This value should be the same as the local size used in creating the
2158            y vector for the matrix-vector product y = Ax.
2159 .  n - This value should be the same as the local size used in creating the
2160        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2161        calculated if N is given) For square matrices n is almost always m.
2162 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2163 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2164 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2165            (same value is used for all local rows)
2166 .  d_nnz - array containing the number of nonzeros in the various rows of the
2167            DIAGONAL portion of the local submatrix (possibly different for each row)
2168            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2169            The size of this array is equal to the number of local rows, i.e 'm'.
2170            You must leave room for the diagonal entry even if it is zero.
2171 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2172            submatrix (same value is used for all local rows).
2173 -  o_nnz - array containing the number of nonzeros in the various rows of the
2174            OFF-DIAGONAL portion of the local submatrix (possibly different for
2175            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2176            structure. The size of this array is equal to the number
2177            of local rows, i.e 'm'.
2178 
2179    Output Parameter:
2180 .  A - the matrix
2181 
2182    Notes:
2183    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2184    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2185    storage requirements for this matrix.
2186 
2187    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2188    processor than it must be used on all processors that share the object for
2189    that argument.
2190 
2191    The AIJ format (also called the Yale sparse matrix format or
2192    compressed row storage), is fully compatible with standard Fortran 77
2193    storage.  That is, the stored row and column indices can begin at
2194    either one (as in Fortran) or zero.  See the users manual for details.
2195 
2196    The user MUST specify either the local or global matrix dimensions
2197    (possibly both).
2198 
2199    The parallel matrix is partitioned such that the first m0 rows belong to
2200    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2201    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2202 
2203    The DIAGONAL portion of the local submatrix of a processor can be defined
2204    as the submatrix which is obtained by extraction the part corresponding
2205    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2206    first row that belongs to the processor, and r2 is the last row belonging
2207    to the this processor. This is a square mxm matrix. The remaining portion
2208    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2209 
2210    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2211 
2212    By default, this format uses inodes (identical nodes) when possible.
2213    We search for consecutive rows with the same nonzero structure, thereby
2214    reusing matrix information to achieve increased efficiency.
2215 
2216    Options Database Keys:
2217 +  -mat_aij_no_inode  - Do not use inodes
2218 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2219 -  -mat_aij_oneindex - Internally use indexing starting at 1
2220         rather than 0.  Note that when calling MatSetValues(),
2221         the user still MUST index entries starting at 0!
2222 
2223 
2224    Example usage:
2225 
2226    Consider the following 8x8 matrix with 34 non-zero values, that is
2227    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2228    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2229    as follows:
2230 
2231 .vb
2232             1  2  0  |  0  3  0  |  0  4
2233     Proc0   0  5  6  |  7  0  0  |  8  0
2234             9  0 10  | 11  0  0  | 12  0
2235     -------------------------------------
2236            13  0 14  | 15 16 17  |  0  0
2237     Proc1   0 18  0  | 19 20 21  |  0  0
2238             0  0  0  | 22 23  0  | 24  0
2239     -------------------------------------
2240     Proc2  25 26 27  |  0  0 28  | 29  0
2241            30  0  0  | 31 32 33  |  0 34
2242 .ve
2243 
2244    This can be represented as a collection of submatrices as:
2245 
2246 .vb
2247       A B C
2248       D E F
2249       G H I
2250 .ve
2251 
2252    Where the submatrices A,B,C are owned by proc0, D,E,F are
2253    owned by proc1, G,H,I are owned by proc2.
2254 
2255    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2256    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2257    The 'M','N' parameters are 8,8, and have the same values on all procs.
2258 
2259    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2260    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2261    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2262    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2263    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2264    matrix, ans [DF] as another SeqAIJ matrix.
2265 
2266    When d_nz, o_nz parameters are specified, d_nz storage elements are
2267    allocated for every row of the local diagonal submatrix, and o_nz
2268    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2269    One way to choose d_nz and o_nz is to use the max nonzerors per local
2270    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2271    In this case, the values of d_nz,o_nz are:
2272 .vb
2273      proc0 : dnz = 2, o_nz = 2
2274      proc1 : dnz = 3, o_nz = 2
2275      proc2 : dnz = 1, o_nz = 4
2276 .ve
2277    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2278    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2279    for proc3. i.e we are using 12+15+10=37 storage locations to store
2280    34 values.
2281 
2282    When d_nnz, o_nnz parameters are specified, the storage is specified
2283    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2284    In the above case the values for d_nnz,o_nnz are:
2285 .vb
2286      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2287      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2288      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2289 .ve
2290    Here the space allocated is sum of all the above values i.e 34, and
2291    hence pre-allocation is perfect.
2292 
2293    Level: intermediate
2294 
2295 .keywords: matrix, aij, compressed row, sparse, parallel
2296 
2297 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2298 @*/
2299 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2300 {
2301   int ierr,size;
2302 
2303   PetscFunctionBegin;
2304   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2305   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2306   if (size > 1) {
2307     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2308     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2309   } else {
2310     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2311     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2312   }
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2318 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2319 {
2320   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2321   PetscFunctionBegin;
2322   *Ad     = a->A;
2323   *Ao     = a->B;
2324   *colmap = a->garray;
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2330 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2331 {
2332   int        ierr;
2333   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2334 
2335   PetscFunctionBegin;
2336   if (coloring->ctype == IS_COLORING_LOCAL) {
2337     int        *allcolors,*colors,i;
2338     ISColoring ocoloring;
2339 
2340     /* set coloring for diagonal portion */
2341     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2342 
2343     /* set coloring for off-diagonal portion */
2344     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2345     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2346     for (i=0; i<a->B->n; i++) {
2347       colors[i] = allcolors[a->garray[i]];
2348     }
2349     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2350     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2351     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2352     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2353   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2354     int        *colors,i,*larray;
2355     ISColoring ocoloring;
2356 
2357     /* set coloring for diagonal portion */
2358     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2359     for (i=0; i<a->A->n; i++) {
2360       larray[i] = i + a->cstart;
2361     }
2362     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2363     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2364     for (i=0; i<a->A->n; i++) {
2365       colors[i] = coloring->colors[larray[i]];
2366     }
2367     ierr = PetscFree(larray);CHKERRQ(ierr);
2368     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2369     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2370     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2371 
2372     /* set coloring for off-diagonal portion */
2373     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2374     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2375     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2376     for (i=0; i<a->B->n; i++) {
2377       colors[i] = coloring->colors[larray[i]];
2378     }
2379     ierr = PetscFree(larray);CHKERRQ(ierr);
2380     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2381     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2382     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2383   } else {
2384     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2385   }
2386 
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2392 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2393 {
2394   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2395   int        ierr;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2399   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2405 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2406 {
2407   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2408   int        ierr;
2409 
2410   PetscFunctionBegin;
2411   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2412   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415