xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 607cd303d4877cc3e3442d2c38e0ac4da6c61802)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
722   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
723   PetscTruth        isdraw,isascii,flg;
724   PetscViewer       sviewer;
725   PetscViewerFormat format;
726 
727   PetscFunctionBegin;
728   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
730   if (isascii) {
731     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
732     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
733       MatInfo info;
734       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
735       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
737       if (flg) {
738         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
739 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
740       } else {
741         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
742 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
743       }
744       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
749       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
750       PetscFunctionReturn(0);
751     } else if (format == PETSC_VIEWER_ASCII_INFO) {
752       PetscFunctionReturn(0);
753     }
754   } else if (isdraw) {
755     PetscDraw       draw;
756     PetscTruth isnull;
757     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
758     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
759   }
760 
761   if (size == 1) {
762     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
763     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
764   } else {
765     /* assemble the entire matrix onto first processor. */
766     Mat         A;
767     Mat_SeqAIJ *Aloc;
768     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
769     PetscScalar *a;
770 
771     if (!rank) {
772       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     } else {
774       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
775     }
776     PetscLogObjectParent(mat,A);
777 
778     /* copy over the A part */
779     Aloc = (Mat_SeqAIJ*)aij->A->data;
780     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781     row = aij->rstart;
782     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
783     for (i=0; i<m; i++) {
784       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
785       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
786     }
787     aj = Aloc->j;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
789 
790     /* copy over the B part */
791     Aloc = (Mat_SeqAIJ*)aij->B->data;
792     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
793     row  = aij->rstart;
794     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
795     ct   = cols;
796     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
797     for (i=0; i<m; i++) {
798       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800     }
801     ierr = PetscFree(ct);CHKERRQ(ierr);
802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     /*
805        Everyone has to call to draw the matrix since the graphics waits are
806        synchronized across all processors that share the PetscDraw object
807     */
808     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
809     if (!rank) {
810       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
811       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
812     }
813     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     ierr = MatDestroy(A);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatView_MPIAIJ"
821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
822 {
823   int        ierr;
824   PetscTruth isascii,isdraw,issocket,isbinary;
825 
826   PetscFunctionBegin;
827   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
828   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
829   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
830   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
831   if (isascii || isdraw || isbinary || issocket) {
832     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
833   } else {
834     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatRelax_MPIAIJ"
843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
844 {
845   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
846   int          ierr;
847   Vec          bb1;
848   PetscScalar  mone=-1.0;
849 
850   PetscFunctionBegin;
851   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
852 
853   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
854 
855   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
856     if (flag & SOR_ZERO_INITIAL_GUESS) {
857       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
858       its--;
859     }
860 
861     while (its--) {
862       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
863       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
864 
865       /* update rhs: bb1 = bb - B*x */
866       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
867       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
868 
869       /* local sweep */
870       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
871       CHKERRQ(ierr);
872     }
873   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
876       its--;
877     }
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else {
908     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
909   }
910 
911   ierr = VecDestroy(bb1);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatGetInfo_MPIAIJ"
917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
918 {
919   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
920   Mat        A = mat->A,B = mat->B;
921   int        ierr;
922   PetscReal  isend[5],irecv[5];
923 
924   PetscFunctionBegin;
925   info->block_size     = 1.0;
926   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
927   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
928   isend[3] = info->memory;  isend[4] = info->mallocs;
929   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
930   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
931   isend[3] += info->memory;  isend[4] += info->mallocs;
932   if (flag == MAT_LOCAL) {
933     info->nz_used      = isend[0];
934     info->nz_allocated = isend[1];
935     info->nz_unneeded  = isend[2];
936     info->memory       = isend[3];
937     info->mallocs      = isend[4];
938   } else if (flag == MAT_GLOBAL_MAX) {
939     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
940     info->nz_used      = irecv[0];
941     info->nz_allocated = irecv[1];
942     info->nz_unneeded  = irecv[2];
943     info->memory       = irecv[3];
944     info->mallocs      = irecv[4];
945   } else if (flag == MAT_GLOBAL_SUM) {
946     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
947     info->nz_used      = irecv[0];
948     info->nz_allocated = irecv[1];
949     info->nz_unneeded  = irecv[2];
950     info->memory       = irecv[3];
951     info->mallocs      = irecv[4];
952   }
953   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
954   info->fill_ratio_needed = 0;
955   info->factor_mallocs    = 0;
956   info->rows_global       = (double)matin->M;
957   info->columns_global    = (double)matin->N;
958   info->rows_local        = (double)matin->m;
959   info->columns_local     = (double)matin->N;
960 
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatSetOption_MPIAIJ"
966 int MatSetOption_MPIAIJ(Mat A,MatOption op)
967 {
968   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
969   int        ierr;
970 
971   PetscFunctionBegin;
972   switch (op) {
973   case MAT_NO_NEW_NONZERO_LOCATIONS:
974   case MAT_YES_NEW_NONZERO_LOCATIONS:
975   case MAT_COLUMNS_UNSORTED:
976   case MAT_COLUMNS_SORTED:
977   case MAT_NEW_NONZERO_ALLOCATION_ERR:
978   case MAT_KEEP_ZEROED_ROWS:
979   case MAT_NEW_NONZERO_LOCATION_ERR:
980   case MAT_USE_INODES:
981   case MAT_DO_NOT_USE_INODES:
982   case MAT_IGNORE_ZERO_ENTRIES:
983     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
984     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
985     break;
986   case MAT_ROW_ORIENTED:
987     a->roworiented = PETSC_TRUE;
988     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
989     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
990     break;
991   case MAT_ROWS_SORTED:
992   case MAT_ROWS_UNSORTED:
993   case MAT_YES_NEW_DIAGONALS:
994   case MAT_USE_SINGLE_PRECISION_SOLVES:
995     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
996     break;
997   case MAT_COLUMN_ORIENTED:
998     a->roworiented = PETSC_FALSE;
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_IGNORE_OFF_PROC_ENTRIES:
1003     a->donotstash = PETSC_TRUE;
1004     break;
1005   case MAT_NO_NEW_DIAGONALS:
1006     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1007     break;
1008   default:
1009     SETERRQ(PETSC_ERR_SUP,"unknown option");
1010     break;
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPIAIJ"
1017 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1018 {
1019   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1021   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1022   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1023   int          *cmap,*idx_p;
1024 
1025   PetscFunctionBegin;
1026   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1027   mat->getrowactive = PETSC_TRUE;
1028 
1029   if (!mat->rowvalues && (idx || v)) {
1030     /*
1031         allocate enough space to hold information from the longest row.
1032     */
1033     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1034     int     max = 1,tmp;
1035     for (i=0; i<matin->m; i++) {
1036       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1037       if (max < tmp) { max = tmp; }
1038     }
1039     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1040     mat->rowindices = (int*)(mat->rowvalues + max);
1041   }
1042 
1043   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1044   lrow = row - rstart;
1045 
1046   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1047   if (!v)   {pvA = 0; pvB = 0;}
1048   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1049   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1050   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1051   nztot = nzA + nzB;
1052 
1053   cmap  = mat->garray;
1054   if (v  || idx) {
1055     if (nztot) {
1056       /* Sort by increasing column numbers, assuming A and B already sorted */
1057       int imark = -1;
1058       if (v) {
1059         *v = v_p = mat->rowvalues;
1060         for (i=0; i<nzB; i++) {
1061           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1062           else break;
1063         }
1064         imark = i;
1065         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1066         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1067       }
1068       if (idx) {
1069         *idx = idx_p = mat->rowindices;
1070         if (imark > -1) {
1071           for (i=0; i<imark; i++) {
1072             idx_p[i] = cmap[cworkB[i]];
1073           }
1074         } else {
1075           for (i=0; i<nzB; i++) {
1076             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1077             else break;
1078           }
1079           imark = i;
1080         }
1081         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1082         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1083       }
1084     } else {
1085       if (idx) *idx = 0;
1086       if (v)   *v   = 0;
1087     }
1088   }
1089   *nz = nztot;
1090   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1091   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1097 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1098 {
1099   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1100 
1101   PetscFunctionBegin;
1102   if (aij->getrowactive == PETSC_FALSE) {
1103     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1104   }
1105   aij->getrowactive = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatNorm_MPIAIJ"
1111 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1112 {
1113   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1114   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1115   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1116   PetscReal    sum = 0.0;
1117   PetscScalar  *v;
1118 
1119   PetscFunctionBegin;
1120   if (aij->size == 1) {
1121     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1122   } else {
1123     if (type == NORM_FROBENIUS) {
1124       v = amat->a;
1125       for (i=0; i<amat->nz; i++) {
1126 #if defined(PETSC_USE_COMPLEX)
1127         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1128 #else
1129         sum += (*v)*(*v); v++;
1130 #endif
1131       }
1132       v = bmat->a;
1133       for (i=0; i<bmat->nz; i++) {
1134 #if defined(PETSC_USE_COMPLEX)
1135         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1136 #else
1137         sum += (*v)*(*v); v++;
1138 #endif
1139       }
1140       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1141       *norm = sqrt(*norm);
1142     } else if (type == NORM_1) { /* max column norm */
1143       PetscReal *tmp,*tmp2;
1144       int    *jj,*garray = aij->garray;
1145       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1146       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1147       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1148       *norm = 0.0;
1149       v = amat->a; jj = amat->j;
1150       for (j=0; j<amat->nz; j++) {
1151         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1152       }
1153       v = bmat->a; jj = bmat->j;
1154       for (j=0; j<bmat->nz; j++) {
1155         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1156       }
1157       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1158       for (j=0; j<mat->N; j++) {
1159         if (tmp2[j] > *norm) *norm = tmp2[j];
1160       }
1161       ierr = PetscFree(tmp);CHKERRQ(ierr);
1162       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1163     } else if (type == NORM_INFINITY) { /* max row norm */
1164       PetscReal ntemp = 0.0;
1165       for (j=0; j<aij->A->m; j++) {
1166         v = amat->a + amat->i[j] + shift;
1167         sum = 0.0;
1168         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1169           sum += PetscAbsScalar(*v); v++;
1170         }
1171         v = bmat->a + bmat->i[j] + shift;
1172         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1173           sum += PetscAbsScalar(*v); v++;
1174         }
1175         if (sum > ntemp) ntemp = sum;
1176       }
1177       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1178     } else {
1179       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1180     }
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPIAIJ"
1187 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1188 {
1189   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1190   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1191   int          ierr,shift = Aloc->indexshift;
1192   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1193   Mat          B;
1194   PetscScalar  *array;
1195 
1196   PetscFunctionBegin;
1197   if (!matout && M != N) {
1198     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1199   }
1200 
1201   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1202 
1203   /* copy over the A part */
1204   Aloc = (Mat_SeqAIJ*)a->A->data;
1205   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1206   row = a->rstart;
1207   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1208   for (i=0; i<m; i++) {
1209     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1210     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1211   }
1212   aj = Aloc->j;
1213   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1214 
1215   /* copy over the B part */
1216   Aloc = (Mat_SeqAIJ*)a->B->data;
1217   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1218   row  = a->rstart;
1219   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1220   ct   = cols;
1221   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1222   for (i=0; i<m; i++) {
1223     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1224     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1225   }
1226   ierr = PetscFree(ct);CHKERRQ(ierr);
1227   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   if (matout) {
1230     *matout = B;
1231   } else {
1232     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1239 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1240 {
1241   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1242   Mat        a = aij->A,b = aij->B;
1243   int        ierr,s1,s2,s3;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1247   if (rr) {
1248     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1249     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1250     /* Overlap communication with computation. */
1251     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1252   }
1253   if (ll) {
1254     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1255     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1256     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1257   }
1258   /* scale  the diagonal block */
1259   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1260 
1261   if (rr) {
1262     /* Do a scatter end and then right scale the off-diagonal block */
1263     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1264     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1265   }
1266 
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1273 int MatPrintHelp_MPIAIJ(Mat A)
1274 {
1275   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1276   int        ierr;
1277 
1278   PetscFunctionBegin;
1279   if (!a->rank) {
1280     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1287 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1288 {
1289   PetscFunctionBegin;
1290   *bs = 1;
1291   PetscFunctionReturn(0);
1292 }
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1295 int MatSetUnfactored_MPIAIJ(Mat A)
1296 {
1297   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1298   int        ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatEqual_MPIAIJ"
1307 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1308 {
1309   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1310   Mat        a,b,c,d;
1311   PetscTruth flg;
1312   int        ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1316   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1317   a = matA->A; b = matA->B;
1318   c = matB->A; d = matB->B;
1319 
1320   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1321   if (flg == PETSC_TRUE) {
1322     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1323   }
1324   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCopy_MPIAIJ"
1330 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1331 {
1332   int        ierr;
1333   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1334   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1335   PetscTruth flg;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1339   if (str != SAME_NONZERO_PATTERN || !flg) {
1340     /* because of the column compression in the off-processor part of the matrix a->B,
1341        the number of columns in a->B and b->B may be different, hence we cannot call
1342        the MatCopy() directly on the two parts. If need be, we can provide a more
1343        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1344        then copying the submatrices */
1345     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1355 int MatSetUpPreallocation_MPIAIJ(Mat A)
1356 {
1357   int        ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1365 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1366 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1367 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1368 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1369 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1370 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1371 #endif
1372 
1373 /* -------------------------------------------------------------------*/
1374 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1375        MatGetRow_MPIAIJ,
1376        MatRestoreRow_MPIAIJ,
1377        MatMult_MPIAIJ,
1378        MatMultAdd_MPIAIJ,
1379        MatMultTranspose_MPIAIJ,
1380        MatMultTransposeAdd_MPIAIJ,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        MatRelax_MPIAIJ,
1388        MatTranspose_MPIAIJ,
1389        MatGetInfo_MPIAIJ,
1390        MatEqual_MPIAIJ,
1391        MatGetDiagonal_MPIAIJ,
1392        MatDiagonalScale_MPIAIJ,
1393        MatNorm_MPIAIJ,
1394        MatAssemblyBegin_MPIAIJ,
1395        MatAssemblyEnd_MPIAIJ,
1396        0,
1397        MatSetOption_MPIAIJ,
1398        MatZeroEntries_MPIAIJ,
1399        MatZeroRows_MPIAIJ,
1400 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1401        MatLUFactorSymbolic_MPIAIJ_TFS,
1402 #else
1403        0,
1404 #endif
1405        0,
1406        0,
1407        0,
1408        MatSetUpPreallocation_MPIAIJ,
1409        0,
1410        0,
1411        0,
1412        0,
1413        MatDuplicate_MPIAIJ,
1414        0,
1415        0,
1416        0,
1417        0,
1418        0,
1419        MatGetSubMatrices_MPIAIJ,
1420        MatIncreaseOverlap_MPIAIJ,
1421        MatGetValues_MPIAIJ,
1422        MatCopy_MPIAIJ,
1423        MatPrintHelp_MPIAIJ,
1424        MatScale_MPIAIJ,
1425        0,
1426        0,
1427        0,
1428        MatGetBlockSize_MPIAIJ,
1429        0,
1430        0,
1431        0,
1432        0,
1433        MatFDColoringCreate_MPIAIJ,
1434        0,
1435        MatSetUnfactored_MPIAIJ,
1436        0,
1437        0,
1438        MatGetSubMatrix_MPIAIJ,
1439        MatDestroy_MPIAIJ,
1440        MatView_MPIAIJ,
1441        MatGetPetscMaps_Petsc,
1442        0,
1443        0,
1444        0,
1445        0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        MatSetColoring_MPIAIJ,
1451        MatSetValuesAdic_MPIAIJ,
1452        MatSetValuesAdifor_MPIAIJ
1453 };
1454 
1455 /* ----------------------------------------------------------------------------------------*/
1456 
1457 EXTERN_C_BEGIN
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1460 int MatStoreValues_MPIAIJ(Mat mat)
1461 {
1462   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1463   int        ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1467   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 EXTERN_C_END
1471 
1472 EXTERN_C_BEGIN
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1475 int MatRetrieveValues_MPIAIJ(Mat mat)
1476 {
1477   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1478   int        ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1482   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 EXTERN_C_END
1486 
1487 #include "petscpc.h"
1488 EXTERN_C_BEGIN
1489 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1490 EXTERN_C_END
1491 
1492 EXTERN_C_BEGIN
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatCreate_MPIAIJ"
1495 int MatCreate_MPIAIJ(Mat B)
1496 {
1497   Mat_MPIAIJ   *b;
1498   int          ierr,i,size;
1499 #if defined(PETSC_HAVE_SUPERLUDIST)
1500   PetscTruth   flg;
1501 #endif
1502 
1503   PetscFunctionBegin;
1504   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1505 
1506   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1507   B->data         = (void*)b;
1508   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1509   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1510   B->factor       = 0;
1511   B->assembled    = PETSC_FALSE;
1512   B->mapping      = 0;
1513 
1514   B->insertmode      = NOT_SET_VALUES;
1515   b->size            = size;
1516   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1517 
1518   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1519   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1520 
1521   /* the information in the maps duplicates the information computed below, eventually
1522      we should remove the duplicate information that is not contained in the maps */
1523   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1524   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1525 
1526   /* build local table of row and column ownerships */
1527   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1528   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1529   b->cowners = b->rowners + b->size + 2;
1530   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1531   b->rowners[0] = 0;
1532   for (i=2; i<=b->size; i++) {
1533     b->rowners[i] += b->rowners[i-1];
1534   }
1535   b->rstart = b->rowners[b->rank];
1536   b->rend   = b->rowners[b->rank+1];
1537   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1538   b->cowners[0] = 0;
1539   for (i=2; i<=b->size; i++) {
1540     b->cowners[i] += b->cowners[i-1];
1541   }
1542   b->cstart = b->cowners[b->rank];
1543   b->cend   = b->cowners[b->rank+1];
1544 
1545   /* build cache for off array entries formed */
1546   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1547   b->donotstash  = PETSC_FALSE;
1548   b->colmap      = 0;
1549   b->garray      = 0;
1550   b->roworiented = PETSC_TRUE;
1551 
1552   /* stuff used for matrix vector multiply */
1553   b->lvec      = PETSC_NULL;
1554   b->Mvctx     = PETSC_NULL;
1555 
1556   /* stuff for MatGetRow() */
1557   b->rowindices   = 0;
1558   b->rowvalues    = 0;
1559   b->getrowactive = PETSC_FALSE;
1560 
1561 /* #if defined(PETSC_HAVE_SUPERLUDIST)
1562 
1563   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1564   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1565 #endif */
1566 
1567   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1568                                      "MatStoreValues_MPIAIJ",
1569                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1570   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1571                                      "MatRetrieveValues_MPIAIJ",
1572                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1573   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1574                                      "MatGetDiagonalBlock_MPIAIJ",
1575                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1576 
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1583 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1584 {
1585   Mat        mat;
1586   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1587   int        ierr;
1588 
1589   PetscFunctionBegin;
1590   *newmat       = 0;
1591   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1592   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1593   a    = (Mat_MPIAIJ*)mat->data;
1594   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1595   mat->factor       = matin->factor;
1596   mat->assembled    = PETSC_TRUE;
1597   mat->insertmode   = NOT_SET_VALUES;
1598   mat->preallocated = PETSC_TRUE;
1599 
1600   a->rstart       = oldmat->rstart;
1601   a->rend         = oldmat->rend;
1602   a->cstart       = oldmat->cstart;
1603   a->cend         = oldmat->cend;
1604   a->size         = oldmat->size;
1605   a->rank         = oldmat->rank;
1606   a->donotstash   = oldmat->donotstash;
1607   a->roworiented  = oldmat->roworiented;
1608   a->rowindices   = 0;
1609   a->rowvalues    = 0;
1610   a->getrowactive = PETSC_FALSE;
1611 
1612   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1613   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1614   if (oldmat->colmap) {
1615 #if defined (PETSC_USE_CTABLE)
1616     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1617 #else
1618     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1619     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1620     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1621 #endif
1622   } else a->colmap = 0;
1623   if (oldmat->garray) {
1624     int len;
1625     len  = oldmat->B->n;
1626     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1627     PetscLogObjectMemory(mat,len*sizeof(int));
1628     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1629   } else a->garray = 0;
1630 
1631   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1632   PetscLogObjectParent(mat,a->lvec);
1633   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1634   PetscLogObjectParent(mat,a->Mvctx);
1635   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1636   PetscLogObjectParent(mat,a->A);
1637   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1638   PetscLogObjectParent(mat,a->B);
1639   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1640   *newmat = mat;
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #include "petscsys.h"
1645 
1646 EXTERN_C_BEGIN
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatLoad_MPIAIJ"
1649 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1650 {
1651   Mat          A;
1652   PetscScalar  *vals,*svals;
1653   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1654   MPI_Status   status;
1655   int          i,nz,ierr,j,rstart,rend,fd;
1656   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1657   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1658   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1659 
1660   PetscFunctionBegin;
1661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1662   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1663   if (!rank) {
1664     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1665     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1666     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1667     if (header[3] < 0) {
1668       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1669     }
1670   }
1671 
1672   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1673   M = header[1]; N = header[2];
1674   /* determine ownership of all rows */
1675   m = M/size + ((M % size) > rank);
1676   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1677   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1678   rowners[0] = 0;
1679   for (i=2; i<=size; i++) {
1680     rowners[i] += rowners[i-1];
1681   }
1682   rstart = rowners[rank];
1683   rend   = rowners[rank+1];
1684 
1685   /* distribute row lengths to all processors */
1686   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1687   offlens = ourlens + (rend-rstart);
1688   if (!rank) {
1689     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1690     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1691     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1692     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1693     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1694     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1695   } else {
1696     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1697   }
1698 
1699   if (!rank) {
1700     /* calculate the number of nonzeros on each processor */
1701     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1702     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1703     for (i=0; i<size; i++) {
1704       for (j=rowners[i]; j< rowners[i+1]; j++) {
1705         procsnz[i] += rowlengths[j];
1706       }
1707     }
1708     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1709 
1710     /* determine max buffer needed and allocate it */
1711     maxnz = 0;
1712     for (i=0; i<size; i++) {
1713       maxnz = PetscMax(maxnz,procsnz[i]);
1714     }
1715     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1716 
1717     /* read in my part of the matrix column indices  */
1718     nz   = procsnz[0];
1719     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1720     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1721 
1722     /* read in every one elses and ship off */
1723     for (i=1; i<size; i++) {
1724       nz   = procsnz[i];
1725       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1726       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1727     }
1728     ierr = PetscFree(cols);CHKERRQ(ierr);
1729   } else {
1730     /* determine buffer space needed for message */
1731     nz = 0;
1732     for (i=0; i<m; i++) {
1733       nz += ourlens[i];
1734     }
1735     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1736 
1737     /* receive message of column indices*/
1738     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1739     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1740     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1741   }
1742 
1743   /* determine column ownership if matrix is not square */
1744   if (N != M) {
1745     n      = N/size + ((N % size) > rank);
1746     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1747     cstart = cend - n;
1748   } else {
1749     cstart = rstart;
1750     cend   = rend;
1751     n      = cend - cstart;
1752   }
1753 
1754   /* loop over local rows, determining number of off diagonal entries */
1755   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1756   jj = 0;
1757   for (i=0; i<m; i++) {
1758     for (j=0; j<ourlens[i]; j++) {
1759       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1760       jj++;
1761     }
1762   }
1763 
1764   /* create our matrix */
1765   for (i=0; i<m; i++) {
1766     ourlens[i] -= offlens[i];
1767   }
1768   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1769   A = *newmat;
1770   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1771   for (i=0; i<m; i++) {
1772     ourlens[i] += offlens[i];
1773   }
1774 
1775   if (!rank) {
1776     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1777 
1778     /* read in my part of the matrix numerical values  */
1779     nz   = procsnz[0];
1780     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1781 
1782     /* insert into matrix */
1783     jj      = rstart;
1784     smycols = mycols;
1785     svals   = vals;
1786     for (i=0; i<m; i++) {
1787       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1788       smycols += ourlens[i];
1789       svals   += ourlens[i];
1790       jj++;
1791     }
1792 
1793     /* read in other processors and ship out */
1794     for (i=1; i<size; i++) {
1795       nz   = procsnz[i];
1796       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1797       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1798     }
1799     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1800   } else {
1801     /* receive numeric values */
1802     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1803 
1804     /* receive message of values*/
1805     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1806     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1807     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1808 
1809     /* insert into matrix */
1810     jj      = rstart;
1811     smycols = mycols;
1812     svals   = vals;
1813     for (i=0; i<m; i++) {
1814       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1815       smycols += ourlens[i];
1816       svals   += ourlens[i];
1817       jj++;
1818     }
1819   }
1820   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1821   ierr = PetscFree(vals);CHKERRQ(ierr);
1822   ierr = PetscFree(mycols);CHKERRQ(ierr);
1823   ierr = PetscFree(rowners);CHKERRQ(ierr);
1824 
1825   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 EXTERN_C_END
1830 
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1833 /*
1834     Not great since it makes two copies of the submatrix, first an SeqAIJ
1835   in local and then by concatenating the local matrices the end result.
1836   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1837 */
1838 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1839 {
1840   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1841   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1842   Mat          *local,M,Mreuse;
1843   PetscScalar  *vwork,*aa;
1844   MPI_Comm     comm = mat->comm;
1845   Mat_SeqAIJ   *aij;
1846 
1847 
1848   PetscFunctionBegin;
1849   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1850   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1851 
1852   if (call ==  MAT_REUSE_MATRIX) {
1853     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1854     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1855     local = &Mreuse;
1856     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1857   } else {
1858     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1859     Mreuse = *local;
1860     ierr   = PetscFree(local);CHKERRQ(ierr);
1861   }
1862 
1863   /*
1864       m - number of local rows
1865       n - number of columns (same on all processors)
1866       rstart - first row in new global matrix generated
1867   */
1868   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1869   if (call == MAT_INITIAL_MATRIX) {
1870     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1871     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1872     ii  = aij->i;
1873     jj  = aij->j;
1874 
1875     /*
1876         Determine the number of non-zeros in the diagonal and off-diagonal
1877         portions of the matrix in order to do correct preallocation
1878     */
1879 
1880     /* first get start and end of "diagonal" columns */
1881     if (csize == PETSC_DECIDE) {
1882       nlocal = n/size + ((n % size) > rank);
1883     } else {
1884       nlocal = csize;
1885     }
1886     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1887     rstart = rend - nlocal;
1888     if (rank == size - 1 && rend != n) {
1889       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1890     }
1891 
1892     /* next, compute all the lengths */
1893     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1894     olens = dlens + m;
1895     for (i=0; i<m; i++) {
1896       jend = ii[i+1] - ii[i];
1897       olen = 0;
1898       dlen = 0;
1899       for (j=0; j<jend; j++) {
1900         if (*jj < rstart || *jj >= rend) olen++;
1901         else dlen++;
1902         jj++;
1903       }
1904       olens[i] = olen;
1905       dlens[i] = dlen;
1906     }
1907     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1908     ierr = PetscFree(dlens);CHKERRQ(ierr);
1909   } else {
1910     int ml,nl;
1911 
1912     M = *newmat;
1913     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1914     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1915     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1916     /*
1917          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1918        rather than the slower MatSetValues().
1919     */
1920     M->was_assembled = PETSC_TRUE;
1921     M->assembled     = PETSC_FALSE;
1922   }
1923   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1924   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1925   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1926   ii  = aij->i;
1927   jj  = aij->j;
1928   aa  = aij->a;
1929   for (i=0; i<m; i++) {
1930     row   = rstart + i;
1931     nz    = ii[i+1] - ii[i];
1932     cwork = jj;     jj += nz;
1933     vwork = aa;     aa += nz;
1934     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1935   }
1936 
1937   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1938   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1939   *newmat = M;
1940 
1941   /* save submatrix used in processor for next request */
1942   if (call ==  MAT_INITIAL_MATRIX) {
1943     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1944     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1945   }
1946 
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1952 /*@C
1953    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1954    (the default parallel PETSc format).  For good matrix assembly performance
1955    the user should preallocate the matrix storage by setting the parameters
1956    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1957    performance can be increased by more than a factor of 50.
1958 
1959    Collective on MPI_Comm
1960 
1961    Input Parameters:
1962 +  A - the matrix
1963 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1964            (same value is used for all local rows)
1965 .  d_nnz - array containing the number of nonzeros in the various rows of the
1966            DIAGONAL portion of the local submatrix (possibly different for each row)
1967            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1968            The size of this array is equal to the number of local rows, i.e 'm'.
1969            You must leave room for the diagonal entry even if it is zero.
1970 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1971            submatrix (same value is used for all local rows).
1972 -  o_nnz - array containing the number of nonzeros in the various rows of the
1973            OFF-DIAGONAL portion of the local submatrix (possibly different for
1974            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1975            structure. The size of this array is equal to the number
1976            of local rows, i.e 'm'.
1977 
1978    The AIJ format (also called the Yale sparse matrix format or
1979    compressed row storage), is fully compatible with standard Fortran 77
1980    storage.  That is, the stored row and column indices can begin at
1981    either one (as in Fortran) or zero.  See the users manual for details.
1982 
1983    The user MUST specify either the local or global matrix dimensions
1984    (possibly both).
1985 
1986    The parallel matrix is partitioned such that the first m0 rows belong to
1987    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1988    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1989 
1990    The DIAGONAL portion of the local submatrix of a processor can be defined
1991    as the submatrix which is obtained by extraction the part corresponding
1992    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1993    first row that belongs to the processor, and r2 is the last row belonging
1994    to the this processor. This is a square mxm matrix. The remaining portion
1995    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1996 
1997    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1998 
1999    By default, this format uses inodes (identical nodes) when possible.
2000    We search for consecutive rows with the same nonzero structure, thereby
2001    reusing matrix information to achieve increased efficiency.
2002 
2003    Options Database Keys:
2004 +  -mat_aij_no_inode  - Do not use inodes
2005 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2006 -  -mat_aij_oneindex - Internally use indexing starting at 1
2007         rather than 0.  Note that when calling MatSetValues(),
2008         the user still MUST index entries starting at 0!
2009 
2010    Example usage:
2011 
2012    Consider the following 8x8 matrix with 34 non-zero values, that is
2013    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2014    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2015    as follows:
2016 
2017 .vb
2018             1  2  0  |  0  3  0  |  0  4
2019     Proc0   0  5  6  |  7  0  0  |  8  0
2020             9  0 10  | 11  0  0  | 12  0
2021     -------------------------------------
2022            13  0 14  | 15 16 17  |  0  0
2023     Proc1   0 18  0  | 19 20 21  |  0  0
2024             0  0  0  | 22 23  0  | 24  0
2025     -------------------------------------
2026     Proc2  25 26 27  |  0  0 28  | 29  0
2027            30  0  0  | 31 32 33  |  0 34
2028 .ve
2029 
2030    This can be represented as a collection of submatrices as:
2031 
2032 .vb
2033       A B C
2034       D E F
2035       G H I
2036 .ve
2037 
2038    Where the submatrices A,B,C are owned by proc0, D,E,F are
2039    owned by proc1, G,H,I are owned by proc2.
2040 
2041    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2042    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2043    The 'M','N' parameters are 8,8, and have the same values on all procs.
2044 
2045    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2046    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2047    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2048    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2049    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2050    matrix, ans [DF] as another SeqAIJ matrix.
2051 
2052    When d_nz, o_nz parameters are specified, d_nz storage elements are
2053    allocated for every row of the local diagonal submatrix, and o_nz
2054    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2055    One way to choose d_nz and o_nz is to use the max nonzerors per local
2056    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2057    In this case, the values of d_nz,o_nz are:
2058 .vb
2059      proc0 : dnz = 2, o_nz = 2
2060      proc1 : dnz = 3, o_nz = 2
2061      proc2 : dnz = 1, o_nz = 4
2062 .ve
2063    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2064    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2065    for proc3. i.e we are using 12+15+10=37 storage locations to store
2066    34 values.
2067 
2068    When d_nnz, o_nnz parameters are specified, the storage is specified
2069    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2070    In the above case the values for d_nnz,o_nnz are:
2071 .vb
2072      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2073      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2074      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2075 .ve
2076    Here the space allocated is sum of all the above values i.e 34, and
2077    hence pre-allocation is perfect.
2078 
2079    Level: intermediate
2080 
2081 .keywords: matrix, aij, compressed row, sparse, parallel
2082 
2083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2084 @*/
2085 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2086 {
2087   Mat_MPIAIJ   *b;
2088   int          ierr,i;
2089   PetscTruth   flg2;
2090 
2091   PetscFunctionBegin;
2092   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2093   if (!flg2) PetscFunctionReturn(0);
2094   B->preallocated = PETSC_TRUE;
2095   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2096   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2097   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2098   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2099   if (d_nnz) {
2100     for (i=0; i<B->m; i++) {
2101       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2102     }
2103   }
2104   if (o_nnz) {
2105     for (i=0; i<B->m; i++) {
2106       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2107     }
2108   }
2109   b = (Mat_MPIAIJ*)B->data;
2110 
2111   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2112   PetscLogObjectParent(B,b->A);
2113   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2114   PetscLogObjectParent(B,b->B);
2115 
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatCreateMPIAIJ"
2121 /*@C
2122    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2123    (the default parallel PETSc format).  For good matrix assembly performance
2124    the user should preallocate the matrix storage by setting the parameters
2125    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2126    performance can be increased by more than a factor of 50.
2127 
2128    Collective on MPI_Comm
2129 
2130    Input Parameters:
2131 +  comm - MPI communicator
2132 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2133            This value should be the same as the local size used in creating the
2134            y vector for the matrix-vector product y = Ax.
2135 .  n - This value should be the same as the local size used in creating the
2136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2137        calculated if N is given) For square matrices n is almost always m.
2138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2140 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2141            (same value is used for all local rows)
2142 .  d_nnz - array containing the number of nonzeros in the various rows of the
2143            DIAGONAL portion of the local submatrix (possibly different for each row)
2144            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2145            The size of this array is equal to the number of local rows, i.e 'm'.
2146            You must leave room for the diagonal entry even if it is zero.
2147 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2148            submatrix (same value is used for all local rows).
2149 -  o_nnz - array containing the number of nonzeros in the various rows of the
2150            OFF-DIAGONAL portion of the local submatrix (possibly different for
2151            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2152            structure. The size of this array is equal to the number
2153            of local rows, i.e 'm'.
2154 
2155    Output Parameter:
2156 .  A - the matrix
2157 
2158    Notes:
2159    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2160    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2161    storage requirements for this matrix.
2162 
2163    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2164    processor than it must be used on all processors that share the object for
2165    that argument.
2166 
2167    The AIJ format (also called the Yale sparse matrix format or
2168    compressed row storage), is fully compatible with standard Fortran 77
2169    storage.  That is, the stored row and column indices can begin at
2170    either one (as in Fortran) or zero.  See the users manual for details.
2171 
2172    The user MUST specify either the local or global matrix dimensions
2173    (possibly both).
2174 
2175    The parallel matrix is partitioned such that the first m0 rows belong to
2176    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2177    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2178 
2179    The DIAGONAL portion of the local submatrix of a processor can be defined
2180    as the submatrix which is obtained by extraction the part corresponding
2181    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2182    first row that belongs to the processor, and r2 is the last row belonging
2183    to the this processor. This is a square mxm matrix. The remaining portion
2184    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2185 
2186    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2187 
2188    By default, this format uses inodes (identical nodes) when possible.
2189    We search for consecutive rows with the same nonzero structure, thereby
2190    reusing matrix information to achieve increased efficiency.
2191 
2192    Options Database Keys:
2193 +  -mat_aij_no_inode  - Do not use inodes
2194 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2195 -  -mat_aij_oneindex - Internally use indexing starting at 1
2196         rather than 0.  Note that when calling MatSetValues(),
2197         the user still MUST index entries starting at 0!
2198 
2199 
2200    Example usage:
2201 
2202    Consider the following 8x8 matrix with 34 non-zero values, that is
2203    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2204    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2205    as follows:
2206 
2207 .vb
2208             1  2  0  |  0  3  0  |  0  4
2209     Proc0   0  5  6  |  7  0  0  |  8  0
2210             9  0 10  | 11  0  0  | 12  0
2211     -------------------------------------
2212            13  0 14  | 15 16 17  |  0  0
2213     Proc1   0 18  0  | 19 20 21  |  0  0
2214             0  0  0  | 22 23  0  | 24  0
2215     -------------------------------------
2216     Proc2  25 26 27  |  0  0 28  | 29  0
2217            30  0  0  | 31 32 33  |  0 34
2218 .ve
2219 
2220    This can be represented as a collection of submatrices as:
2221 
2222 .vb
2223       A B C
2224       D E F
2225       G H I
2226 .ve
2227 
2228    Where the submatrices A,B,C are owned by proc0, D,E,F are
2229    owned by proc1, G,H,I are owned by proc2.
2230 
2231    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2232    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2233    The 'M','N' parameters are 8,8, and have the same values on all procs.
2234 
2235    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2236    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2237    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2238    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2239    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2240    matrix, ans [DF] as another SeqAIJ matrix.
2241 
2242    When d_nz, o_nz parameters are specified, d_nz storage elements are
2243    allocated for every row of the local diagonal submatrix, and o_nz
2244    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2245    One way to choose d_nz and o_nz is to use the max nonzerors per local
2246    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2247    In this case, the values of d_nz,o_nz are:
2248 .vb
2249      proc0 : dnz = 2, o_nz = 2
2250      proc1 : dnz = 3, o_nz = 2
2251      proc2 : dnz = 1, o_nz = 4
2252 .ve
2253    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2254    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2255    for proc3. i.e we are using 12+15+10=37 storage locations to store
2256    34 values.
2257 
2258    When d_nnz, o_nnz parameters are specified, the storage is specified
2259    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2260    In the above case the values for d_nnz,o_nnz are:
2261 .vb
2262      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2263      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2264      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2265 .ve
2266    Here the space allocated is sum of all the above values i.e 34, and
2267    hence pre-allocation is perfect.
2268 
2269    Level: intermediate
2270 
2271 .keywords: matrix, aij, compressed row, sparse, parallel
2272 
2273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2274 @*/
2275 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2276 {
2277   int ierr,size;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2282   if (size > 1) {
2283     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2284     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2285   } else {
2286     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2287     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2288   }
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2294 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2295 {
2296   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2297   PetscFunctionBegin;
2298   *Ad     = a->A;
2299   *Ao     = a->B;
2300   *colmap = a->garray;
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2306 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2307 {
2308   int        ierr;
2309   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2310 
2311   PetscFunctionBegin;
2312   if (coloring->ctype == IS_COLORING_LOCAL) {
2313     int        *allcolors,*colors,i;
2314     ISColoring ocoloring;
2315 
2316     /* set coloring for diagonal portion */
2317     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2318 
2319     /* set coloring for off-diagonal portion */
2320     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2321     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2322     for (i=0; i<a->B->n; i++) {
2323       colors[i] = allcolors[a->garray[i]];
2324     }
2325     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2326     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2327     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2328     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2329   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2330     int        *colors,i,*larray;
2331     ISColoring ocoloring;
2332 
2333     /* set coloring for diagonal portion */
2334     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2335     for (i=0; i<a->A->n; i++) {
2336       larray[i] = i + a->cstart;
2337     }
2338     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2339     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2340     for (i=0; i<a->A->n; i++) {
2341       colors[i] = coloring->colors[larray[i]];
2342     }
2343     ierr = PetscFree(larray);CHKERRQ(ierr);
2344     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2345     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2346     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2347 
2348     /* set coloring for off-diagonal portion */
2349     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2350     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2351     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2352     for (i=0; i<a->B->n; i++) {
2353       colors[i] = coloring->colors[larray[i]];
2354     }
2355     ierr = PetscFree(larray);CHKERRQ(ierr);
2356     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2357     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2358     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2359   } else {
2360     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2361   }
2362 
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2368 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2369 {
2370   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2371   int        ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2375   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2381 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2382 {
2383   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2384   int        ierr;
2385 
2386   PetscFunctionBegin;
2387   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2388   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391