xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c4888f26f790dc9e94f91dfe95482b098d46e974)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
722   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
723   PetscTruth        isdraw,isascii,flg;
724   PetscViewer       sviewer;
725   PetscViewerFormat format;
726 
727   PetscFunctionBegin;
728   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
730   if (isascii) {
731     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
732     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
733       MatInfo info;
734       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
735       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
737       if (flg) {
738         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
739 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
740       } else {
741         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
742 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
743       }
744       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
749       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
750       PetscFunctionReturn(0);
751     } else if (format == PETSC_VIEWER_ASCII_INFO) {
752       PetscFunctionReturn(0);
753     }
754   } else if (isdraw) {
755     PetscDraw       draw;
756     PetscTruth isnull;
757     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
758     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
759   }
760 
761   if (size == 1) {
762     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
763     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
764   } else {
765     /* assemble the entire matrix onto first processor. */
766     Mat         A;
767     Mat_SeqAIJ *Aloc;
768     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
769     PetscScalar *a;
770 
771     if (!rank) {
772       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     } else {
774       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
775     }
776     PetscLogObjectParent(mat,A);
777 
778     /* copy over the A part */
779     Aloc = (Mat_SeqAIJ*)aij->A->data;
780     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781     row = aij->rstart;
782     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
783     for (i=0; i<m; i++) {
784       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
785       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
786     }
787     aj = Aloc->j;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
789 
790     /* copy over the B part */
791     Aloc = (Mat_SeqAIJ*)aij->B->data;
792     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
793     row  = aij->rstart;
794     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
795     ct   = cols;
796     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
797     for (i=0; i<m; i++) {
798       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800     }
801     ierr = PetscFree(ct);CHKERRQ(ierr);
802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     /*
805        Everyone has to call to draw the matrix since the graphics waits are
806        synchronized across all processors that share the PetscDraw object
807     */
808     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
809     if (!rank) {
810       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
811       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
812     }
813     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     ierr = MatDestroy(A);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatView_MPIAIJ"
821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
822 {
823   int        ierr;
824   PetscTruth isascii,isdraw,issocket,isbinary;
825 
826   PetscFunctionBegin;
827   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
828   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
829   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
830   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
831   if (isascii || isdraw || isbinary || issocket) {
832     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
833   } else {
834     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatRelax_MPIAIJ"
843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
844 {
845   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
846   int          ierr;
847   Vec          bb1;
848   PetscScalar  mone=-1.0;
849 
850   PetscFunctionBegin;
851   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
852 
853   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
854 
855   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
856     if (flag & SOR_ZERO_INITIAL_GUESS) {
857       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
858       its--;
859     }
860 
861     while (its--) {
862       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
863       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
864 
865       /* update rhs: bb1 = bb - B*x */
866       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
867       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
868 
869       /* local sweep */
870       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
871       CHKERRQ(ierr);
872     }
873   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
876       its--;
877     }
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else {
908     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
909   }
910 
911   ierr = VecDestroy(bb1);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatGetInfo_MPIAIJ"
917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
918 {
919   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
920   Mat        A = mat->A,B = mat->B;
921   int        ierr;
922   PetscReal  isend[5],irecv[5];
923 
924   PetscFunctionBegin;
925   info->block_size     = 1.0;
926   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
927   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
928   isend[3] = info->memory;  isend[4] = info->mallocs;
929   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
930   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
931   isend[3] += info->memory;  isend[4] += info->mallocs;
932   if (flag == MAT_LOCAL) {
933     info->nz_used      = isend[0];
934     info->nz_allocated = isend[1];
935     info->nz_unneeded  = isend[2];
936     info->memory       = isend[3];
937     info->mallocs      = isend[4];
938   } else if (flag == MAT_GLOBAL_MAX) {
939     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
940     info->nz_used      = irecv[0];
941     info->nz_allocated = irecv[1];
942     info->nz_unneeded  = irecv[2];
943     info->memory       = irecv[3];
944     info->mallocs      = irecv[4];
945   } else if (flag == MAT_GLOBAL_SUM) {
946     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
947     info->nz_used      = irecv[0];
948     info->nz_allocated = irecv[1];
949     info->nz_unneeded  = irecv[2];
950     info->memory       = irecv[3];
951     info->mallocs      = irecv[4];
952   }
953   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
954   info->fill_ratio_needed = 0;
955   info->factor_mallocs    = 0;
956   info->rows_global       = (double)matin->M;
957   info->columns_global    = (double)matin->N;
958   info->rows_local        = (double)matin->m;
959   info->columns_local     = (double)matin->N;
960 
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatSetOption_MPIAIJ"
966 int MatSetOption_MPIAIJ(Mat A,MatOption op)
967 {
968   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
969   int        ierr;
970 
971   PetscFunctionBegin;
972   switch (op) {
973   case MAT_NO_NEW_NONZERO_LOCATIONS:
974   case MAT_YES_NEW_NONZERO_LOCATIONS:
975   case MAT_COLUMNS_UNSORTED:
976   case MAT_COLUMNS_SORTED:
977   case MAT_NEW_NONZERO_ALLOCATION_ERR:
978   case MAT_KEEP_ZEROED_ROWS:
979   case MAT_NEW_NONZERO_LOCATION_ERR:
980   case MAT_USE_INODES:
981   case MAT_DO_NOT_USE_INODES:
982   case MAT_IGNORE_ZERO_ENTRIES:
983     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
984     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
985     break;
986   case MAT_ROW_ORIENTED:
987     a->roworiented = PETSC_TRUE;
988     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
989     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
990     break;
991   case MAT_ROWS_SORTED:
992   case MAT_ROWS_UNSORTED:
993   case MAT_YES_NEW_DIAGONALS:
994   case MAT_USE_SINGLE_PRECISION_SOLVES:
995     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
996     break;
997   case MAT_COLUMN_ORIENTED:
998     a->roworiented = PETSC_FALSE;
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_IGNORE_OFF_PROC_ENTRIES:
1003     a->donotstash = PETSC_TRUE;
1004     break;
1005   case MAT_NO_NEW_DIAGONALS:
1006     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1007   default:
1008     SETERRQ(PETSC_ERR_SUP,"unknown option");
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatGetRow_MPIAIJ"
1015 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1016 {
1017   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1018   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1019   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1020   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1021   int          *cmap,*idx_p;
1022 
1023   PetscFunctionBegin;
1024   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1025   mat->getrowactive = PETSC_TRUE;
1026 
1027   if (!mat->rowvalues && (idx || v)) {
1028     /*
1029         allocate enough space to hold information from the longest row.
1030     */
1031     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1032     int     max = 1,tmp;
1033     for (i=0; i<matin->m; i++) {
1034       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1035       if (max < tmp) { max = tmp; }
1036     }
1037     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1038     mat->rowindices = (int*)(mat->rowvalues + max);
1039   }
1040 
1041   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1042   lrow = row - rstart;
1043 
1044   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1045   if (!v)   {pvA = 0; pvB = 0;}
1046   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1047   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1048   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1049   nztot = nzA + nzB;
1050 
1051   cmap  = mat->garray;
1052   if (v  || idx) {
1053     if (nztot) {
1054       /* Sort by increasing column numbers, assuming A and B already sorted */
1055       int imark = -1;
1056       if (v) {
1057         *v = v_p = mat->rowvalues;
1058         for (i=0; i<nzB; i++) {
1059           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1060           else break;
1061         }
1062         imark = i;
1063         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1064         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1065       }
1066       if (idx) {
1067         *idx = idx_p = mat->rowindices;
1068         if (imark > -1) {
1069           for (i=0; i<imark; i++) {
1070             idx_p[i] = cmap[cworkB[i]];
1071           }
1072         } else {
1073           for (i=0; i<nzB; i++) {
1074             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1075             else break;
1076           }
1077           imark = i;
1078         }
1079         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1080         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1081       }
1082     } else {
1083       if (idx) *idx = 0;
1084       if (v)   *v   = 0;
1085     }
1086   }
1087   *nz = nztot;
1088   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1089   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1095 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1096 {
1097   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1098 
1099   PetscFunctionBegin;
1100   if (aij->getrowactive == PETSC_FALSE) {
1101     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1102   }
1103   aij->getrowactive = PETSC_FALSE;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatNorm_MPIAIJ"
1109 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1110 {
1111   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1112   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1113   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1114   PetscReal    sum = 0.0;
1115   PetscScalar  *v;
1116 
1117   PetscFunctionBegin;
1118   if (aij->size == 1) {
1119     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1120   } else {
1121     if (type == NORM_FROBENIUS) {
1122       v = amat->a;
1123       for (i=0; i<amat->nz; i++) {
1124 #if defined(PETSC_USE_COMPLEX)
1125         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1126 #else
1127         sum += (*v)*(*v); v++;
1128 #endif
1129       }
1130       v = bmat->a;
1131       for (i=0; i<bmat->nz; i++) {
1132 #if defined(PETSC_USE_COMPLEX)
1133         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1134 #else
1135         sum += (*v)*(*v); v++;
1136 #endif
1137       }
1138       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1139       *norm = sqrt(*norm);
1140     } else if (type == NORM_1) { /* max column norm */
1141       PetscReal *tmp,*tmp2;
1142       int    *jj,*garray = aij->garray;
1143       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1144       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1145       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1146       *norm = 0.0;
1147       v = amat->a; jj = amat->j;
1148       for (j=0; j<amat->nz; j++) {
1149         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1150       }
1151       v = bmat->a; jj = bmat->j;
1152       for (j=0; j<bmat->nz; j++) {
1153         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1154       }
1155       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1156       for (j=0; j<mat->N; j++) {
1157         if (tmp2[j] > *norm) *norm = tmp2[j];
1158       }
1159       ierr = PetscFree(tmp);CHKERRQ(ierr);
1160       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1161     } else if (type == NORM_INFINITY) { /* max row norm */
1162       PetscReal ntemp = 0.0;
1163       for (j=0; j<aij->A->m; j++) {
1164         v = amat->a + amat->i[j] + shift;
1165         sum = 0.0;
1166         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1167           sum += PetscAbsScalar(*v); v++;
1168         }
1169         v = bmat->a + bmat->i[j] + shift;
1170         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1171           sum += PetscAbsScalar(*v); v++;
1172         }
1173         if (sum > ntemp) ntemp = sum;
1174       }
1175       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1176     } else {
1177       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1178     }
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatTranspose_MPIAIJ"
1185 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1186 {
1187   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1188   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1189   int          ierr,shift = Aloc->indexshift;
1190   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1191   Mat          B;
1192   PetscScalar  *array;
1193 
1194   PetscFunctionBegin;
1195   if (!matout && M != N) {
1196     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1197   }
1198 
1199   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1200 
1201   /* copy over the A part */
1202   Aloc = (Mat_SeqAIJ*)a->A->data;
1203   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1204   row = a->rstart;
1205   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1206   for (i=0; i<m; i++) {
1207     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1208     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1209   }
1210   aj = Aloc->j;
1211   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1212 
1213   /* copy over the B part */
1214   Aloc = (Mat_SeqAIJ*)a->B->data;
1215   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1216   row  = a->rstart;
1217   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1218   ct   = cols;
1219   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1220   for (i=0; i<m; i++) {
1221     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1222     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1223   }
1224   ierr = PetscFree(ct);CHKERRQ(ierr);
1225   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1226   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1227   if (matout) {
1228     *matout = B;
1229   } else {
1230     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1237 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1238 {
1239   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1240   Mat        a = aij->A,b = aij->B;
1241   int        ierr,s1,s2,s3;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1245   if (rr) {
1246     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1247     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1248     /* Overlap communication with computation. */
1249     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1250   }
1251   if (ll) {
1252     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1253     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1254     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1255   }
1256   /* scale  the diagonal block */
1257   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1258 
1259   if (rr) {
1260     /* Do a scatter end and then right scale the off-diagonal block */
1261     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1262     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1263   }
1264 
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1271 int MatPrintHelp_MPIAIJ(Mat A)
1272 {
1273   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1274   int        ierr;
1275 
1276   PetscFunctionBegin;
1277   if (!a->rank) {
1278     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1285 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1286 {
1287   PetscFunctionBegin;
1288   *bs = 1;
1289   PetscFunctionReturn(0);
1290 }
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1293 int MatSetUnfactored_MPIAIJ(Mat A)
1294 {
1295   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1296   int        ierr;
1297 
1298   PetscFunctionBegin;
1299   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatEqual_MPIAIJ"
1305 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1306 {
1307   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1308   Mat        a,b,c,d;
1309   PetscTruth flg;
1310   int        ierr;
1311 
1312   PetscFunctionBegin;
1313   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1314   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1315   a = matA->A; b = matA->B;
1316   c = matB->A; d = matB->B;
1317 
1318   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1319   if (flg == PETSC_TRUE) {
1320     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1321   }
1322   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatCopy_MPIAIJ"
1328 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1329 {
1330   int        ierr;
1331   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1332   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1333   PetscTruth flg;
1334 
1335   PetscFunctionBegin;
1336   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1337   if (str != SAME_NONZERO_PATTERN || !flg) {
1338     /* because of the column compression in the off-processor part of the matrix a->B,
1339        the number of columns in a->B and b->B may be different, hence we cannot call
1340        the MatCopy() directly on the two parts. If need be, we can provide a more
1341        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1342        then copying the submatrices */
1343     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1344   } else {
1345     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1346     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1347   }
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1353 int MatSetUpPreallocation_MPIAIJ(Mat A)
1354 {
1355   int        ierr;
1356 
1357   PetscFunctionBegin;
1358   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1363 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1364 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1365 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1366 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1367 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1368 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1369 #endif
1370 
1371 #include "petscblaslapack.h"
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatAXPY_MPIAIJ"
1375 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1376 {
1377   int        ierr,one;
1378   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1379   Mat_SeqAIJ *x,*y;
1380 
1381   PetscFunctionBegin;
1382   if (str == SAME_NONZERO_PATTERN) {
1383     x  = (Mat_SeqAIJ *)xx->A->data;
1384     y  = (Mat_SeqAIJ *)yy->A->data;
1385     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1386     x  = (Mat_SeqAIJ *)xx->B->data;
1387     y  = (Mat_SeqAIJ *)yy->B->data;
1388     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1389   } else {
1390     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /* -------------------------------------------------------------------*/
1396 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1397        MatGetRow_MPIAIJ,
1398        MatRestoreRow_MPIAIJ,
1399        MatMult_MPIAIJ,
1400        MatMultAdd_MPIAIJ,
1401        MatMultTranspose_MPIAIJ,
1402        MatMultTransposeAdd_MPIAIJ,
1403        0,
1404        0,
1405        0,
1406        0,
1407        0,
1408        0,
1409        MatRelax_MPIAIJ,
1410        MatTranspose_MPIAIJ,
1411        MatGetInfo_MPIAIJ,
1412        MatEqual_MPIAIJ,
1413        MatGetDiagonal_MPIAIJ,
1414        MatDiagonalScale_MPIAIJ,
1415        MatNorm_MPIAIJ,
1416        MatAssemblyBegin_MPIAIJ,
1417        MatAssemblyEnd_MPIAIJ,
1418        0,
1419        MatSetOption_MPIAIJ,
1420        MatZeroEntries_MPIAIJ,
1421        MatZeroRows_MPIAIJ,
1422 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1423        MatLUFactorSymbolic_MPIAIJ_TFS,
1424 #else
1425        0,
1426 #endif
1427        0,
1428        0,
1429        0,
1430        MatSetUpPreallocation_MPIAIJ,
1431        0,
1432        0,
1433        0,
1434        0,
1435        MatDuplicate_MPIAIJ,
1436        0,
1437        0,
1438        0,
1439        0,
1440        MatAXPY_MPIAIJ,
1441        MatGetSubMatrices_MPIAIJ,
1442        MatIncreaseOverlap_MPIAIJ,
1443        MatGetValues_MPIAIJ,
1444        MatCopy_MPIAIJ,
1445        MatPrintHelp_MPIAIJ,
1446        MatScale_MPIAIJ,
1447        0,
1448        0,
1449        0,
1450        MatGetBlockSize_MPIAIJ,
1451        0,
1452        0,
1453        0,
1454        0,
1455        MatFDColoringCreate_MPIAIJ,
1456        0,
1457        MatSetUnfactored_MPIAIJ,
1458        0,
1459        0,
1460        MatGetSubMatrix_MPIAIJ,
1461        MatDestroy_MPIAIJ,
1462        MatView_MPIAIJ,
1463        MatGetPetscMaps_Petsc,
1464        0,
1465        0,
1466        0,
1467        0,
1468        0,
1469        0,
1470        0,
1471        0,
1472        MatSetColoring_MPIAIJ,
1473        MatSetValuesAdic_MPIAIJ,
1474        MatSetValuesAdifor_MPIAIJ
1475 };
1476 
1477 /* ----------------------------------------------------------------------------------------*/
1478 
1479 EXTERN_C_BEGIN
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1482 int MatStoreValues_MPIAIJ(Mat mat)
1483 {
1484   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1485   int        ierr;
1486 
1487   PetscFunctionBegin;
1488   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1489   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 EXTERN_C_END
1493 
1494 EXTERN_C_BEGIN
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1497 int MatRetrieveValues_MPIAIJ(Mat mat)
1498 {
1499   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1500   int        ierr;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1504   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 EXTERN_C_END
1508 
1509 #include "petscpc.h"
1510 EXTERN_C_BEGIN
1511 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1512 EXTERN_C_END
1513 
1514 EXTERN_C_BEGIN
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatCreate_MPIAIJ"
1517 int MatCreate_MPIAIJ(Mat B)
1518 {
1519   Mat_MPIAIJ *b;
1520   int        ierr,i,size;
1521 #if defined(PETSC_HAVE_SUPERLUDIST)
1522   PetscTruth flg;
1523 #endif
1524 
1525 
1526   PetscFunctionBegin;
1527   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1528 
1529   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1530   B->data         = (void*)b;
1531   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1532   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1533   B->factor       = 0;
1534   B->assembled    = PETSC_FALSE;
1535   B->mapping      = 0;
1536 
1537   B->insertmode      = NOT_SET_VALUES;
1538   b->size            = size;
1539   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1540 
1541   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1542   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1543 
1544   /* the information in the maps duplicates the information computed below, eventually
1545      we should remove the duplicate information that is not contained in the maps */
1546   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1547   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1548 
1549   /* build local table of row and column ownerships */
1550   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1551   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1552   b->cowners = b->rowners + b->size + 2;
1553   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1554   b->rowners[0] = 0;
1555   for (i=2; i<=b->size; i++) {
1556     b->rowners[i] += b->rowners[i-1];
1557   }
1558   b->rstart = b->rowners[b->rank];
1559   b->rend   = b->rowners[b->rank+1];
1560   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1561   b->cowners[0] = 0;
1562   for (i=2; i<=b->size; i++) {
1563     b->cowners[i] += b->cowners[i-1];
1564   }
1565   b->cstart = b->cowners[b->rank];
1566   b->cend   = b->cowners[b->rank+1];
1567 
1568   /* build cache for off array entries formed */
1569   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1570   b->donotstash  = PETSC_FALSE;
1571   b->colmap      = 0;
1572   b->garray      = 0;
1573   b->roworiented = PETSC_TRUE;
1574 
1575   /* stuff used for matrix vector multiply */
1576   b->lvec      = PETSC_NULL;
1577   b->Mvctx     = PETSC_NULL;
1578 
1579   /* stuff for MatGetRow() */
1580   b->rowindices   = 0;
1581   b->rowvalues    = 0;
1582   b->getrowactive = PETSC_FALSE;
1583 
1584 #if defined(PETSC_HAVE_SUPERLUDIST)
1585   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1586   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1587 #endif
1588 
1589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1590                                      "MatStoreValues_MPIAIJ",
1591                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1593                                      "MatRetrieveValues_MPIAIJ",
1594                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1596                                      "MatGetDiagonalBlock_MPIAIJ",
1597                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1598 
1599   PetscFunctionReturn(0);
1600 }
1601 EXTERN_C_END
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1605 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1606 {
1607   Mat        mat;
1608   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1609   int        ierr;
1610 
1611   PetscFunctionBegin;
1612   *newmat       = 0;
1613   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1614   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1615   a    = (Mat_MPIAIJ*)mat->data;
1616   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1617   mat->factor       = matin->factor;
1618   mat->assembled    = PETSC_TRUE;
1619   mat->insertmode   = NOT_SET_VALUES;
1620   mat->preallocated = PETSC_TRUE;
1621 
1622   a->rstart       = oldmat->rstart;
1623   a->rend         = oldmat->rend;
1624   a->cstart       = oldmat->cstart;
1625   a->cend         = oldmat->cend;
1626   a->size         = oldmat->size;
1627   a->rank         = oldmat->rank;
1628   a->donotstash   = oldmat->donotstash;
1629   a->roworiented  = oldmat->roworiented;
1630   a->rowindices   = 0;
1631   a->rowvalues    = 0;
1632   a->getrowactive = PETSC_FALSE;
1633 
1634   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1635   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1636   if (oldmat->colmap) {
1637 #if defined (PETSC_USE_CTABLE)
1638     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1639 #else
1640     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1641     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1642     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1643 #endif
1644   } else a->colmap = 0;
1645   if (oldmat->garray) {
1646     int len;
1647     len  = oldmat->B->n;
1648     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1649     PetscLogObjectMemory(mat,len*sizeof(int));
1650     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1651   } else a->garray = 0;
1652 
1653   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1654   PetscLogObjectParent(mat,a->lvec);
1655   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1656   PetscLogObjectParent(mat,a->Mvctx);
1657   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1658   PetscLogObjectParent(mat,a->A);
1659   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1660   PetscLogObjectParent(mat,a->B);
1661   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1662   *newmat = mat;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #include "petscsys.h"
1667 
1668 EXTERN_C_BEGIN
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatLoad_MPIAIJ"
1671 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1672 {
1673   Mat          A;
1674   PetscScalar  *vals,*svals;
1675   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1676   MPI_Status   status;
1677   int          i,nz,ierr,j,rstart,rend,fd;
1678   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1679   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1680   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1681 
1682   PetscFunctionBegin;
1683   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1684   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1685   if (!rank) {
1686     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1687     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1688     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1689     if (header[3] < 0) {
1690       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1691     }
1692   }
1693 
1694   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1695   M = header[1]; N = header[2];
1696   /* determine ownership of all rows */
1697   m = M/size + ((M % size) > rank);
1698   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1699   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1700   rowners[0] = 0;
1701   for (i=2; i<=size; i++) {
1702     rowners[i] += rowners[i-1];
1703   }
1704   rstart = rowners[rank];
1705   rend   = rowners[rank+1];
1706 
1707   /* distribute row lengths to all processors */
1708   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1709   offlens = ourlens + (rend-rstart);
1710   if (!rank) {
1711     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1712     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1713     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1714     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1715     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1716     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1717   } else {
1718     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1719   }
1720 
1721   if (!rank) {
1722     /* calculate the number of nonzeros on each processor */
1723     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1724     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1725     for (i=0; i<size; i++) {
1726       for (j=rowners[i]; j< rowners[i+1]; j++) {
1727         procsnz[i] += rowlengths[j];
1728       }
1729     }
1730     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1731 
1732     /* determine max buffer needed and allocate it */
1733     maxnz = 0;
1734     for (i=0; i<size; i++) {
1735       maxnz = PetscMax(maxnz,procsnz[i]);
1736     }
1737     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1738 
1739     /* read in my part of the matrix column indices  */
1740     nz   = procsnz[0];
1741     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1742     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1743 
1744     /* read in every one elses and ship off */
1745     for (i=1; i<size; i++) {
1746       nz   = procsnz[i];
1747       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1748       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1749     }
1750     ierr = PetscFree(cols);CHKERRQ(ierr);
1751   } else {
1752     /* determine buffer space needed for message */
1753     nz = 0;
1754     for (i=0; i<m; i++) {
1755       nz += ourlens[i];
1756     }
1757     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1758 
1759     /* receive message of column indices*/
1760     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1761     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1762     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1763   }
1764 
1765   /* determine column ownership if matrix is not square */
1766   if (N != M) {
1767     n      = N/size + ((N % size) > rank);
1768     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1769     cstart = cend - n;
1770   } else {
1771     cstart = rstart;
1772     cend   = rend;
1773     n      = cend - cstart;
1774   }
1775 
1776   /* loop over local rows, determining number of off diagonal entries */
1777   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1778   jj = 0;
1779   for (i=0; i<m; i++) {
1780     for (j=0; j<ourlens[i]; j++) {
1781       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1782       jj++;
1783     }
1784   }
1785 
1786   /* create our matrix */
1787   for (i=0; i<m; i++) {
1788     ourlens[i] -= offlens[i];
1789   }
1790   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1791   A = *newmat;
1792   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1793   for (i=0; i<m; i++) {
1794     ourlens[i] += offlens[i];
1795   }
1796 
1797   if (!rank) {
1798     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1799 
1800     /* read in my part of the matrix numerical values  */
1801     nz   = procsnz[0];
1802     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1803 
1804     /* insert into matrix */
1805     jj      = rstart;
1806     smycols = mycols;
1807     svals   = vals;
1808     for (i=0; i<m; i++) {
1809       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1810       smycols += ourlens[i];
1811       svals   += ourlens[i];
1812       jj++;
1813     }
1814 
1815     /* read in other processors and ship out */
1816     for (i=1; i<size; i++) {
1817       nz   = procsnz[i];
1818       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1819       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1820     }
1821     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1822   } else {
1823     /* receive numeric values */
1824     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1825 
1826     /* receive message of values*/
1827     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1828     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1829     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1830 
1831     /* insert into matrix */
1832     jj      = rstart;
1833     smycols = mycols;
1834     svals   = vals;
1835     for (i=0; i<m; i++) {
1836       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1837       smycols += ourlens[i];
1838       svals   += ourlens[i];
1839       jj++;
1840     }
1841   }
1842   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1843   ierr = PetscFree(vals);CHKERRQ(ierr);
1844   ierr = PetscFree(mycols);CHKERRQ(ierr);
1845   ierr = PetscFree(rowners);CHKERRQ(ierr);
1846 
1847   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1848   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 EXTERN_C_END
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1855 /*
1856     Not great since it makes two copies of the submatrix, first an SeqAIJ
1857   in local and then by concatenating the local matrices the end result.
1858   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1859 */
1860 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1861 {
1862   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1863   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1864   Mat          *local,M,Mreuse;
1865   PetscScalar  *vwork,*aa;
1866   MPI_Comm     comm = mat->comm;
1867   Mat_SeqAIJ   *aij;
1868 
1869 
1870   PetscFunctionBegin;
1871   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1873 
1874   if (call ==  MAT_REUSE_MATRIX) {
1875     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1876     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1877     local = &Mreuse;
1878     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1879   } else {
1880     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1881     Mreuse = *local;
1882     ierr   = PetscFree(local);CHKERRQ(ierr);
1883   }
1884 
1885   /*
1886       m - number of local rows
1887       n - number of columns (same on all processors)
1888       rstart - first row in new global matrix generated
1889   */
1890   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1891   if (call == MAT_INITIAL_MATRIX) {
1892     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1893     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1894     ii  = aij->i;
1895     jj  = aij->j;
1896 
1897     /*
1898         Determine the number of non-zeros in the diagonal and off-diagonal
1899         portions of the matrix in order to do correct preallocation
1900     */
1901 
1902     /* first get start and end of "diagonal" columns */
1903     if (csize == PETSC_DECIDE) {
1904       nlocal = n/size + ((n % size) > rank);
1905     } else {
1906       nlocal = csize;
1907     }
1908     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1909     rstart = rend - nlocal;
1910     if (rank == size - 1 && rend != n) {
1911       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1912     }
1913 
1914     /* next, compute all the lengths */
1915     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1916     olens = dlens + m;
1917     for (i=0; i<m; i++) {
1918       jend = ii[i+1] - ii[i];
1919       olen = 0;
1920       dlen = 0;
1921       for (j=0; j<jend; j++) {
1922         if (*jj < rstart || *jj >= rend) olen++;
1923         else dlen++;
1924         jj++;
1925       }
1926       olens[i] = olen;
1927       dlens[i] = dlen;
1928     }
1929     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1930     ierr = PetscFree(dlens);CHKERRQ(ierr);
1931   } else {
1932     int ml,nl;
1933 
1934     M = *newmat;
1935     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1936     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1937     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1938     /*
1939          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1940        rather than the slower MatSetValues().
1941     */
1942     M->was_assembled = PETSC_TRUE;
1943     M->assembled     = PETSC_FALSE;
1944   }
1945   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1946   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1947   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1948   ii  = aij->i;
1949   jj  = aij->j;
1950   aa  = aij->a;
1951   for (i=0; i<m; i++) {
1952     row   = rstart + i;
1953     nz    = ii[i+1] - ii[i];
1954     cwork = jj;     jj += nz;
1955     vwork = aa;     aa += nz;
1956     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1957   }
1958 
1959   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1960   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961   *newmat = M;
1962 
1963   /* save submatrix used in processor for next request */
1964   if (call ==  MAT_INITIAL_MATRIX) {
1965     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1966     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1967   }
1968 
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1974 /*@C
1975    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1976    (the default parallel PETSc format).  For good matrix assembly performance
1977    the user should preallocate the matrix storage by setting the parameters
1978    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1979    performance can be increased by more than a factor of 50.
1980 
1981    Collective on MPI_Comm
1982 
1983    Input Parameters:
1984 +  A - the matrix
1985 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1986            (same value is used for all local rows)
1987 .  d_nnz - array containing the number of nonzeros in the various rows of the
1988            DIAGONAL portion of the local submatrix (possibly different for each row)
1989            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1990            The size of this array is equal to the number of local rows, i.e 'm'.
1991            You must leave room for the diagonal entry even if it is zero.
1992 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1993            submatrix (same value is used for all local rows).
1994 -  o_nnz - array containing the number of nonzeros in the various rows of the
1995            OFF-DIAGONAL portion of the local submatrix (possibly different for
1996            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1997            structure. The size of this array is equal to the number
1998            of local rows, i.e 'm'.
1999 
2000    The AIJ format (also called the Yale sparse matrix format or
2001    compressed row storage), is fully compatible with standard Fortran 77
2002    storage.  That is, the stored row and column indices can begin at
2003    either one (as in Fortran) or zero.  See the users manual for details.
2004 
2005    The user MUST specify either the local or global matrix dimensions
2006    (possibly both).
2007 
2008    The parallel matrix is partitioned such that the first m0 rows belong to
2009    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2010    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2011 
2012    The DIAGONAL portion of the local submatrix of a processor can be defined
2013    as the submatrix which is obtained by extraction the part corresponding
2014    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2015    first row that belongs to the processor, and r2 is the last row belonging
2016    to the this processor. This is a square mxm matrix. The remaining portion
2017    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2018 
2019    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2020 
2021    By default, this format uses inodes (identical nodes) when possible.
2022    We search for consecutive rows with the same nonzero structure, thereby
2023    reusing matrix information to achieve increased efficiency.
2024 
2025    Options Database Keys:
2026 +  -mat_aij_no_inode  - Do not use inodes
2027 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2028 -  -mat_aij_oneindex - Internally use indexing starting at 1
2029         rather than 0.  Note that when calling MatSetValues(),
2030         the user still MUST index entries starting at 0!
2031 
2032    Example usage:
2033 
2034    Consider the following 8x8 matrix with 34 non-zero values, that is
2035    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2036    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2037    as follows:
2038 
2039 .vb
2040             1  2  0  |  0  3  0  |  0  4
2041     Proc0   0  5  6  |  7  0  0  |  8  0
2042             9  0 10  | 11  0  0  | 12  0
2043     -------------------------------------
2044            13  0 14  | 15 16 17  |  0  0
2045     Proc1   0 18  0  | 19 20 21  |  0  0
2046             0  0  0  | 22 23  0  | 24  0
2047     -------------------------------------
2048     Proc2  25 26 27  |  0  0 28  | 29  0
2049            30  0  0  | 31 32 33  |  0 34
2050 .ve
2051 
2052    This can be represented as a collection of submatrices as:
2053 
2054 .vb
2055       A B C
2056       D E F
2057       G H I
2058 .ve
2059 
2060    Where the submatrices A,B,C are owned by proc0, D,E,F are
2061    owned by proc1, G,H,I are owned by proc2.
2062 
2063    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2064    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2065    The 'M','N' parameters are 8,8, and have the same values on all procs.
2066 
2067    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2068    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2069    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2070    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2071    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2072    matrix, ans [DF] as another SeqAIJ matrix.
2073 
2074    When d_nz, o_nz parameters are specified, d_nz storage elements are
2075    allocated for every row of the local diagonal submatrix, and o_nz
2076    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2077    One way to choose d_nz and o_nz is to use the max nonzerors per local
2078    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2079    In this case, the values of d_nz,o_nz are:
2080 .vb
2081      proc0 : dnz = 2, o_nz = 2
2082      proc1 : dnz = 3, o_nz = 2
2083      proc2 : dnz = 1, o_nz = 4
2084 .ve
2085    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2086    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2087    for proc3. i.e we are using 12+15+10=37 storage locations to store
2088    34 values.
2089 
2090    When d_nnz, o_nnz parameters are specified, the storage is specified
2091    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2092    In the above case the values for d_nnz,o_nnz are:
2093 .vb
2094      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2095      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2096      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2097 .ve
2098    Here the space allocated is sum of all the above values i.e 34, and
2099    hence pre-allocation is perfect.
2100 
2101    Level: intermediate
2102 
2103 .keywords: matrix, aij, compressed row, sparse, parallel
2104 
2105 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2106 @*/
2107 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2108 {
2109   Mat_MPIAIJ   *b;
2110   int          ierr,i;
2111   PetscTruth   flg2;
2112 
2113   PetscFunctionBegin;
2114   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2115   if (!flg2) PetscFunctionReturn(0);
2116   B->preallocated = PETSC_TRUE;
2117   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2118   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2119   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2120   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2121   if (d_nnz) {
2122     for (i=0; i<B->m; i++) {
2123       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2124     }
2125   }
2126   if (o_nnz) {
2127     for (i=0; i<B->m; i++) {
2128       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2129     }
2130   }
2131   b = (Mat_MPIAIJ*)B->data;
2132 
2133   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2134   PetscLogObjectParent(B,b->A);
2135   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2136   PetscLogObjectParent(B,b->B);
2137 
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "MatCreateMPIAIJ"
2143 /*@C
2144    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2145    (the default parallel PETSc format).  For good matrix assembly performance
2146    the user should preallocate the matrix storage by setting the parameters
2147    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2148    performance can be increased by more than a factor of 50.
2149 
2150    Collective on MPI_Comm
2151 
2152    Input Parameters:
2153 +  comm - MPI communicator
2154 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2155            This value should be the same as the local size used in creating the
2156            y vector for the matrix-vector product y = Ax.
2157 .  n - This value should be the same as the local size used in creating the
2158        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2159        calculated if N is given) For square matrices n is almost always m.
2160 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2161 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2162 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2163            (same value is used for all local rows)
2164 .  d_nnz - array containing the number of nonzeros in the various rows of the
2165            DIAGONAL portion of the local submatrix (possibly different for each row)
2166            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2167            The size of this array is equal to the number of local rows, i.e 'm'.
2168            You must leave room for the diagonal entry even if it is zero.
2169 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2170            submatrix (same value is used for all local rows).
2171 -  o_nnz - array containing the number of nonzeros in the various rows of the
2172            OFF-DIAGONAL portion of the local submatrix (possibly different for
2173            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2174            structure. The size of this array is equal to the number
2175            of local rows, i.e 'm'.
2176 
2177    Output Parameter:
2178 .  A - the matrix
2179 
2180    Notes:
2181    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2182    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2183    storage requirements for this matrix.
2184 
2185    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2186    processor than it must be used on all processors that share the object for
2187    that argument.
2188 
2189    The AIJ format (also called the Yale sparse matrix format or
2190    compressed row storage), is fully compatible with standard Fortran 77
2191    storage.  That is, the stored row and column indices can begin at
2192    either one (as in Fortran) or zero.  See the users manual for details.
2193 
2194    The user MUST specify either the local or global matrix dimensions
2195    (possibly both).
2196 
2197    The parallel matrix is partitioned such that the first m0 rows belong to
2198    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2199    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2200 
2201    The DIAGONAL portion of the local submatrix of a processor can be defined
2202    as the submatrix which is obtained by extraction the part corresponding
2203    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2204    first row that belongs to the processor, and r2 is the last row belonging
2205    to the this processor. This is a square mxm matrix. The remaining portion
2206    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2207 
2208    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2209 
2210    By default, this format uses inodes (identical nodes) when possible.
2211    We search for consecutive rows with the same nonzero structure, thereby
2212    reusing matrix information to achieve increased efficiency.
2213 
2214    Options Database Keys:
2215 +  -mat_aij_no_inode  - Do not use inodes
2216 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2217 -  -mat_aij_oneindex - Internally use indexing starting at 1
2218         rather than 0.  Note that when calling MatSetValues(),
2219         the user still MUST index entries starting at 0!
2220 
2221 
2222    Example usage:
2223 
2224    Consider the following 8x8 matrix with 34 non-zero values, that is
2225    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2226    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2227    as follows:
2228 
2229 .vb
2230             1  2  0  |  0  3  0  |  0  4
2231     Proc0   0  5  6  |  7  0  0  |  8  0
2232             9  0 10  | 11  0  0  | 12  0
2233     -------------------------------------
2234            13  0 14  | 15 16 17  |  0  0
2235     Proc1   0 18  0  | 19 20 21  |  0  0
2236             0  0  0  | 22 23  0  | 24  0
2237     -------------------------------------
2238     Proc2  25 26 27  |  0  0 28  | 29  0
2239            30  0  0  | 31 32 33  |  0 34
2240 .ve
2241 
2242    This can be represented as a collection of submatrices as:
2243 
2244 .vb
2245       A B C
2246       D E F
2247       G H I
2248 .ve
2249 
2250    Where the submatrices A,B,C are owned by proc0, D,E,F are
2251    owned by proc1, G,H,I are owned by proc2.
2252 
2253    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2254    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2255    The 'M','N' parameters are 8,8, and have the same values on all procs.
2256 
2257    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2258    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2259    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2260    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2261    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2262    matrix, ans [DF] as another SeqAIJ matrix.
2263 
2264    When d_nz, o_nz parameters are specified, d_nz storage elements are
2265    allocated for every row of the local diagonal submatrix, and o_nz
2266    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2267    One way to choose d_nz and o_nz is to use the max nonzerors per local
2268    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2269    In this case, the values of d_nz,o_nz are:
2270 .vb
2271      proc0 : dnz = 2, o_nz = 2
2272      proc1 : dnz = 3, o_nz = 2
2273      proc2 : dnz = 1, o_nz = 4
2274 .ve
2275    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2276    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2277    for proc3. i.e we are using 12+15+10=37 storage locations to store
2278    34 values.
2279 
2280    When d_nnz, o_nnz parameters are specified, the storage is specified
2281    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2282    In the above case the values for d_nnz,o_nnz are:
2283 .vb
2284      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2285      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2286      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2287 .ve
2288    Here the space allocated is sum of all the above values i.e 34, and
2289    hence pre-allocation is perfect.
2290 
2291    Level: intermediate
2292 
2293 .keywords: matrix, aij, compressed row, sparse, parallel
2294 
2295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2296 @*/
2297 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2298 {
2299   int ierr,size;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2303   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2304   if (size > 1) {
2305     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2306     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2307   } else {
2308     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2309     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2310   }
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2316 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2317 {
2318   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2319   PetscFunctionBegin;
2320   *Ad     = a->A;
2321   *Ao     = a->B;
2322   *colmap = a->garray;
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2328 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2329 {
2330   int        ierr;
2331   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2332 
2333   PetscFunctionBegin;
2334   if (coloring->ctype == IS_COLORING_LOCAL) {
2335     int        *allcolors,*colors,i;
2336     ISColoring ocoloring;
2337 
2338     /* set coloring for diagonal portion */
2339     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2340 
2341     /* set coloring for off-diagonal portion */
2342     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2343     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2344     for (i=0; i<a->B->n; i++) {
2345       colors[i] = allcolors[a->garray[i]];
2346     }
2347     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2348     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2349     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2350     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2351   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2352     int        *colors,i,*larray;
2353     ISColoring ocoloring;
2354 
2355     /* set coloring for diagonal portion */
2356     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2357     for (i=0; i<a->A->n; i++) {
2358       larray[i] = i + a->cstart;
2359     }
2360     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2361     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2362     for (i=0; i<a->A->n; i++) {
2363       colors[i] = coloring->colors[larray[i]];
2364     }
2365     ierr = PetscFree(larray);CHKERRQ(ierr);
2366     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2367     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2368     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2369 
2370     /* set coloring for off-diagonal portion */
2371     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2372     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2373     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2374     for (i=0; i<a->B->n; i++) {
2375       colors[i] = coloring->colors[larray[i]];
2376     }
2377     ierr = PetscFree(larray);CHKERRQ(ierr);
2378     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2379     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2380     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2381   } else {
2382     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2383   }
2384 
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2390 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2391 {
2392   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2393   int        ierr;
2394 
2395   PetscFunctionBegin;
2396   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2397   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2403 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2404 {
2405   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2406   int        ierr;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2410   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413