xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8f968ba35c4541eaec8a8885f38b21f52fa2d3e6)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
722   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
723   PetscTruth        isdraw,isascii,flg;
724   PetscViewer       sviewer;
725   PetscViewerFormat format;
726 
727   PetscFunctionBegin;
728   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
730   if (isascii) {
731     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
732     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
733       MatInfo info;
734       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
735       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
737       if (flg) {
738         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
739 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
740       } else {
741         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
742 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
743       }
744       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
749       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
750       PetscFunctionReturn(0);
751     } else if (format == PETSC_VIEWER_ASCII_INFO) {
752       PetscFunctionReturn(0);
753     }
754   } else if (isdraw) {
755     PetscDraw       draw;
756     PetscTruth isnull;
757     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
758     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
759   }
760 
761   if (size == 1) {
762     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
763     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
764   } else {
765     /* assemble the entire matrix onto first processor. */
766     Mat         A;
767     Mat_SeqAIJ *Aloc;
768     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
769     PetscScalar *a;
770 
771     if (!rank) {
772       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     } else {
774       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
775     }
776     PetscLogObjectParent(mat,A);
777 
778     /* copy over the A part */
779     Aloc = (Mat_SeqAIJ*)aij->A->data;
780     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781     row = aij->rstart;
782     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
783     for (i=0; i<m; i++) {
784       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
785       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
786     }
787     aj = Aloc->j;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
789 
790     /* copy over the B part */
791     Aloc = (Mat_SeqAIJ*)aij->B->data;
792     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
793     row  = aij->rstart;
794     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
795     ct   = cols;
796     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
797     for (i=0; i<m; i++) {
798       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800     }
801     ierr = PetscFree(ct);CHKERRQ(ierr);
802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     /*
805        Everyone has to call to draw the matrix since the graphics waits are
806        synchronized across all processors that share the PetscDraw object
807     */
808     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
809     if (!rank) {
810       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
811       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
812     }
813     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     ierr = MatDestroy(A);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatView_MPIAIJ"
821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
822 {
823   int        ierr;
824   PetscTruth isascii,isdraw,issocket,isbinary;
825 
826   PetscFunctionBegin;
827   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
828   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
829   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
830   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
831   if (isascii || isdraw || isbinary || issocket) {
832     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
833   } else {
834     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatRelax_MPIAIJ"
843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
844 {
845   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
846   int          ierr;
847   Vec          bb1;
848   PetscScalar  mone=-1.0;
849 
850   PetscFunctionBegin;
851   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
852 
853   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
854 
855   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
856     if (flag & SOR_ZERO_INITIAL_GUESS) {
857       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
858       its--;
859     }
860 
861     while (its--) {
862       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
863       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
864 
865       /* update rhs: bb1 = bb - B*x */
866       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
867       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
868 
869       /* local sweep */
870       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
871       CHKERRQ(ierr);
872     }
873   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
876       its--;
877     }
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else {
908     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
909   }
910 
911   ierr = VecDestroy(bb1);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatGetInfo_MPIAIJ"
917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
918 {
919   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
920   Mat        A = mat->A,B = mat->B;
921   int        ierr;
922   PetscReal  isend[5],irecv[5];
923 
924   PetscFunctionBegin;
925   info->block_size     = 1.0;
926   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
927   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
928   isend[3] = info->memory;  isend[4] = info->mallocs;
929   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
930   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
931   isend[3] += info->memory;  isend[4] += info->mallocs;
932   if (flag == MAT_LOCAL) {
933     info->nz_used      = isend[0];
934     info->nz_allocated = isend[1];
935     info->nz_unneeded  = isend[2];
936     info->memory       = isend[3];
937     info->mallocs      = isend[4];
938   } else if (flag == MAT_GLOBAL_MAX) {
939     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
940     info->nz_used      = irecv[0];
941     info->nz_allocated = irecv[1];
942     info->nz_unneeded  = irecv[2];
943     info->memory       = irecv[3];
944     info->mallocs      = irecv[4];
945   } else if (flag == MAT_GLOBAL_SUM) {
946     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
947     info->nz_used      = irecv[0];
948     info->nz_allocated = irecv[1];
949     info->nz_unneeded  = irecv[2];
950     info->memory       = irecv[3];
951     info->mallocs      = irecv[4];
952   }
953   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
954   info->fill_ratio_needed = 0;
955   info->factor_mallocs    = 0;
956   info->rows_global       = (double)matin->M;
957   info->columns_global    = (double)matin->N;
958   info->rows_local        = (double)matin->m;
959   info->columns_local     = (double)matin->N;
960 
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatSetOption_MPIAIJ"
966 int MatSetOption_MPIAIJ(Mat A,MatOption op)
967 {
968   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
969   int        ierr;
970 
971   PetscFunctionBegin;
972   switch (op) {
973   case MAT_NO_NEW_NONZERO_LOCATIONS:
974   case MAT_YES_NEW_NONZERO_LOCATIONS:
975   case MAT_COLUMNS_UNSORTED:
976   case MAT_COLUMNS_SORTED:
977   case MAT_NEW_NONZERO_ALLOCATION_ERR:
978   case MAT_KEEP_ZEROED_ROWS:
979   case MAT_NEW_NONZERO_LOCATION_ERR:
980   case MAT_USE_INODES:
981   case MAT_DO_NOT_USE_INODES:
982   case MAT_IGNORE_ZERO_ENTRIES:
983     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
984     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
985     break;
986   case MAT_ROW_ORIENTED:
987     a->roworiented = PETSC_TRUE;
988     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
989     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
990     break;
991   case MAT_ROWS_SORTED:
992   case MAT_ROWS_UNSORTED:
993   case MAT_YES_NEW_DIAGONALS:
994   case MAT_USE_SINGLE_PRECISION_SOLVES:
995     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
996     break;
997   case MAT_COLUMN_ORIENTED:
998     a->roworiented = PETSC_FALSE;
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_IGNORE_OFF_PROC_ENTRIES:
1003     a->donotstash = PETSC_TRUE;
1004     break;
1005   case MAT_NO_NEW_DIAGONALS:
1006     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1007     break;
1008   default:
1009     SETERRQ(PETSC_ERR_SUP,"unknown option");
1010     break;
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPIAIJ"
1017 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1018 {
1019   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1021   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1022   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1023   int          *cmap,*idx_p;
1024 
1025   PetscFunctionBegin;
1026   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1027   mat->getrowactive = PETSC_TRUE;
1028 
1029   if (!mat->rowvalues && (idx || v)) {
1030     /*
1031         allocate enough space to hold information from the longest row.
1032     */
1033     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1034     int     max = 1,tmp;
1035     for (i=0; i<matin->m; i++) {
1036       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1037       if (max < tmp) { max = tmp; }
1038     }
1039     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1040     mat->rowindices = (int*)(mat->rowvalues + max);
1041   }
1042 
1043   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1044   lrow = row - rstart;
1045 
1046   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1047   if (!v)   {pvA = 0; pvB = 0;}
1048   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1049   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1050   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1051   nztot = nzA + nzB;
1052 
1053   cmap  = mat->garray;
1054   if (v  || idx) {
1055     if (nztot) {
1056       /* Sort by increasing column numbers, assuming A and B already sorted */
1057       int imark = -1;
1058       if (v) {
1059         *v = v_p = mat->rowvalues;
1060         for (i=0; i<nzB; i++) {
1061           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1062           else break;
1063         }
1064         imark = i;
1065         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1066         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1067       }
1068       if (idx) {
1069         *idx = idx_p = mat->rowindices;
1070         if (imark > -1) {
1071           for (i=0; i<imark; i++) {
1072             idx_p[i] = cmap[cworkB[i]];
1073           }
1074         } else {
1075           for (i=0; i<nzB; i++) {
1076             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1077             else break;
1078           }
1079           imark = i;
1080         }
1081         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1082         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1083       }
1084     } else {
1085       if (idx) *idx = 0;
1086       if (v)   *v   = 0;
1087     }
1088   }
1089   *nz = nztot;
1090   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1091   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1097 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1098 {
1099   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1100 
1101   PetscFunctionBegin;
1102   if (aij->getrowactive == PETSC_FALSE) {
1103     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1104   }
1105   aij->getrowactive = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatNorm_MPIAIJ"
1111 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1112 {
1113   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1114   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1115   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1116   PetscReal    sum = 0.0;
1117   PetscScalar  *v;
1118 
1119   PetscFunctionBegin;
1120   if (aij->size == 1) {
1121     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1122   } else {
1123     if (type == NORM_FROBENIUS) {
1124       v = amat->a;
1125       for (i=0; i<amat->nz; i++) {
1126 #if defined(PETSC_USE_COMPLEX)
1127         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1128 #else
1129         sum += (*v)*(*v); v++;
1130 #endif
1131       }
1132       v = bmat->a;
1133       for (i=0; i<bmat->nz; i++) {
1134 #if defined(PETSC_USE_COMPLEX)
1135         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1136 #else
1137         sum += (*v)*(*v); v++;
1138 #endif
1139       }
1140       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1141       *norm = sqrt(*norm);
1142     } else if (type == NORM_1) { /* max column norm */
1143       PetscReal *tmp,*tmp2;
1144       int    *jj,*garray = aij->garray;
1145       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1146       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1147       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1148       *norm = 0.0;
1149       v = amat->a; jj = amat->j;
1150       for (j=0; j<amat->nz; j++) {
1151         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1152       }
1153       v = bmat->a; jj = bmat->j;
1154       for (j=0; j<bmat->nz; j++) {
1155         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1156       }
1157       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1158       for (j=0; j<mat->N; j++) {
1159         if (tmp2[j] > *norm) *norm = tmp2[j];
1160       }
1161       ierr = PetscFree(tmp);CHKERRQ(ierr);
1162       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1163     } else if (type == NORM_INFINITY) { /* max row norm */
1164       PetscReal ntemp = 0.0;
1165       for (j=0; j<aij->A->m; j++) {
1166         v = amat->a + amat->i[j] + shift;
1167         sum = 0.0;
1168         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1169           sum += PetscAbsScalar(*v); v++;
1170         }
1171         v = bmat->a + bmat->i[j] + shift;
1172         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1173           sum += PetscAbsScalar(*v); v++;
1174         }
1175         if (sum > ntemp) ntemp = sum;
1176       }
1177       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1178     } else {
1179       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1180     }
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPIAIJ"
1187 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1188 {
1189   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1190   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1191   int          ierr,shift = Aloc->indexshift;
1192   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1193   Mat          B;
1194   PetscScalar  *array;
1195 
1196   PetscFunctionBegin;
1197   if (!matout && M != N) {
1198     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1199   }
1200 
1201   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1202 
1203   /* copy over the A part */
1204   Aloc = (Mat_SeqAIJ*)a->A->data;
1205   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1206   row = a->rstart;
1207   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1208   for (i=0; i<m; i++) {
1209     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1210     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1211   }
1212   aj = Aloc->j;
1213   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1214 
1215   /* copy over the B part */
1216   Aloc = (Mat_SeqAIJ*)a->B->data;
1217   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1218   row  = a->rstart;
1219   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1220   ct   = cols;
1221   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1222   for (i=0; i<m; i++) {
1223     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1224     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1225   }
1226   ierr = PetscFree(ct);CHKERRQ(ierr);
1227   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   if (matout) {
1230     *matout = B;
1231   } else {
1232     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1239 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1240 {
1241   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1242   Mat        a = aij->A,b = aij->B;
1243   int        ierr,s1,s2,s3;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1247   if (rr) {
1248     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1249     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1250     /* Overlap communication with computation. */
1251     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1252   }
1253   if (ll) {
1254     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1255     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1256     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1257   }
1258   /* scale  the diagonal block */
1259   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1260 
1261   if (rr) {
1262     /* Do a scatter end and then right scale the off-diagonal block */
1263     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1264     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1265   }
1266 
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1273 int MatPrintHelp_MPIAIJ(Mat A)
1274 {
1275   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1276   int        ierr;
1277 
1278   PetscFunctionBegin;
1279   if (!a->rank) {
1280     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1287 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1288 {
1289   PetscFunctionBegin;
1290   *bs = 1;
1291   PetscFunctionReturn(0);
1292 }
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1295 int MatSetUnfactored_MPIAIJ(Mat A)
1296 {
1297   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1298   int        ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatEqual_MPIAIJ"
1307 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1308 {
1309   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1310   Mat        a,b,c,d;
1311   PetscTruth flg;
1312   int        ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1316   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1317   a = matA->A; b = matA->B;
1318   c = matB->A; d = matB->B;
1319 
1320   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1321   if (flg == PETSC_TRUE) {
1322     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1323   }
1324   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCopy_MPIAIJ"
1330 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1331 {
1332   int        ierr;
1333   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1334   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1335   PetscTruth flg;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1339   if (str != SAME_NONZERO_PATTERN || !flg) {
1340     /* because of the column compression in the off-processor part of the matrix a->B,
1341        the number of columns in a->B and b->B may be different, hence we cannot call
1342        the MatCopy() directly on the two parts. If need be, we can provide a more
1343        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1344        then copying the submatrices */
1345     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1355 int MatSetUpPreallocation_MPIAIJ(Mat A)
1356 {
1357   int        ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1365 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1366 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1367 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1368 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1369 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1370 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1371 #endif
1372 
1373 /* -------------------------------------------------------------------*/
1374 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1375        MatGetRow_MPIAIJ,
1376        MatRestoreRow_MPIAIJ,
1377        MatMult_MPIAIJ,
1378        MatMultAdd_MPIAIJ,
1379        MatMultTranspose_MPIAIJ,
1380        MatMultTransposeAdd_MPIAIJ,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        MatRelax_MPIAIJ,
1388        MatTranspose_MPIAIJ,
1389        MatGetInfo_MPIAIJ,
1390        MatEqual_MPIAIJ,
1391        MatGetDiagonal_MPIAIJ,
1392        MatDiagonalScale_MPIAIJ,
1393        MatNorm_MPIAIJ,
1394        MatAssemblyBegin_MPIAIJ,
1395        MatAssemblyEnd_MPIAIJ,
1396        0,
1397        MatSetOption_MPIAIJ,
1398        MatZeroEntries_MPIAIJ,
1399        MatZeroRows_MPIAIJ,
1400 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1401        MatLUFactorSymbolic_MPIAIJ_TFS,
1402 #else
1403        0,
1404 #endif
1405        0,
1406        0,
1407        0,
1408        MatSetUpPreallocation_MPIAIJ,
1409        0,
1410        0,
1411        0,
1412        0,
1413        MatDuplicate_MPIAIJ,
1414        0,
1415        0,
1416        0,
1417        0,
1418        0,
1419        MatGetSubMatrices_MPIAIJ,
1420        MatIncreaseOverlap_MPIAIJ,
1421        MatGetValues_MPIAIJ,
1422        MatCopy_MPIAIJ,
1423        MatPrintHelp_MPIAIJ,
1424        MatScale_MPIAIJ,
1425        0,
1426        0,
1427        0,
1428        MatGetBlockSize_MPIAIJ,
1429        0,
1430        0,
1431        0,
1432        0,
1433        MatFDColoringCreate_MPIAIJ,
1434        0,
1435        MatSetUnfactored_MPIAIJ,
1436        0,
1437        0,
1438        MatGetSubMatrix_MPIAIJ,
1439        MatDestroy_MPIAIJ,
1440        MatView_MPIAIJ,
1441        MatGetPetscMaps_Petsc,
1442        0,
1443        0,
1444        0,
1445        0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        MatSetColoring_MPIAIJ,
1451        MatSetValuesAdic_MPIAIJ,
1452        MatSetValuesAdifor_MPIAIJ
1453 };
1454 
1455 /* ----------------------------------------------------------------------------------------*/
1456 
1457 EXTERN_C_BEGIN
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1460 int MatStoreValues_MPIAIJ(Mat mat)
1461 {
1462   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1463   int        ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1467   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 EXTERN_C_END
1471 
1472 EXTERN_C_BEGIN
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1475 int MatRetrieveValues_MPIAIJ(Mat mat)
1476 {
1477   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1478   int        ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1482   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 EXTERN_C_END
1486 
1487 #include "petscpc.h"
1488 EXTERN_C_BEGIN
1489 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1490 EXTERN_C_END
1491 
1492 EXTERN_C_BEGIN
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatCreate_MPIAIJ"
1495 int MatCreate_MPIAIJ(Mat B)
1496 {
1497   Mat_MPIAIJ *b;
1498   int        ierr,i,size;
1499 
1500   PetscFunctionBegin;
1501   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1502 
1503   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1504   B->data         = (void*)b;
1505   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1506   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1507   B->factor       = 0;
1508   B->assembled    = PETSC_FALSE;
1509   B->mapping      = 0;
1510 
1511   B->insertmode      = NOT_SET_VALUES;
1512   b->size            = size;
1513   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1514 
1515   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1516   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1517 
1518   /* the information in the maps duplicates the information computed below, eventually
1519      we should remove the duplicate information that is not contained in the maps */
1520   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1521   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1522 
1523   /* build local table of row and column ownerships */
1524   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1525   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1526   b->cowners = b->rowners + b->size + 2;
1527   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1528   b->rowners[0] = 0;
1529   for (i=2; i<=b->size; i++) {
1530     b->rowners[i] += b->rowners[i-1];
1531   }
1532   b->rstart = b->rowners[b->rank];
1533   b->rend   = b->rowners[b->rank+1];
1534   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1535   b->cowners[0] = 0;
1536   for (i=2; i<=b->size; i++) {
1537     b->cowners[i] += b->cowners[i-1];
1538   }
1539   b->cstart = b->cowners[b->rank];
1540   b->cend   = b->cowners[b->rank+1];
1541 
1542   /* build cache for off array entries formed */
1543   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1544   b->donotstash  = PETSC_FALSE;
1545   b->colmap      = 0;
1546   b->garray      = 0;
1547   b->roworiented = PETSC_TRUE;
1548 
1549   /* stuff used for matrix vector multiply */
1550   b->lvec      = PETSC_NULL;
1551   b->Mvctx     = PETSC_NULL;
1552 
1553   /* stuff for MatGetRow() */
1554   b->rowindices   = 0;
1555   b->rowvalues    = 0;
1556   b->getrowactive = PETSC_FALSE;
1557 
1558 /* #if defined(PETSC_HAVE_SUPERLUDIST)
1559 
1560   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1561   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1562 #endif */
1563 
1564   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1565                                      "MatStoreValues_MPIAIJ",
1566                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1567   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1568                                      "MatRetrieveValues_MPIAIJ",
1569                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1570   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1571                                      "MatGetDiagonalBlock_MPIAIJ",
1572                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1573 
1574   PetscFunctionReturn(0);
1575 }
1576 EXTERN_C_END
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1580 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1581 {
1582   Mat        mat;
1583   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1584   int        ierr;
1585 
1586   PetscFunctionBegin;
1587   *newmat       = 0;
1588   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1589   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1590   a    = (Mat_MPIAIJ*)mat->data;
1591   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1592   mat->factor       = matin->factor;
1593   mat->assembled    = PETSC_TRUE;
1594   mat->insertmode   = NOT_SET_VALUES;
1595   mat->preallocated = PETSC_TRUE;
1596 
1597   a->rstart       = oldmat->rstart;
1598   a->rend         = oldmat->rend;
1599   a->cstart       = oldmat->cstart;
1600   a->cend         = oldmat->cend;
1601   a->size         = oldmat->size;
1602   a->rank         = oldmat->rank;
1603   a->donotstash   = oldmat->donotstash;
1604   a->roworiented  = oldmat->roworiented;
1605   a->rowindices   = 0;
1606   a->rowvalues    = 0;
1607   a->getrowactive = PETSC_FALSE;
1608 
1609   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1610   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1611   if (oldmat->colmap) {
1612 #if defined (PETSC_USE_CTABLE)
1613     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1614 #else
1615     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1616     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1617     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1618 #endif
1619   } else a->colmap = 0;
1620   if (oldmat->garray) {
1621     int len;
1622     len  = oldmat->B->n;
1623     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1624     PetscLogObjectMemory(mat,len*sizeof(int));
1625     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1626   } else a->garray = 0;
1627 
1628   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1629   PetscLogObjectParent(mat,a->lvec);
1630   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1631   PetscLogObjectParent(mat,a->Mvctx);
1632   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1633   PetscLogObjectParent(mat,a->A);
1634   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1635   PetscLogObjectParent(mat,a->B);
1636   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1637   *newmat = mat;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #include "petscsys.h"
1642 
1643 EXTERN_C_BEGIN
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatLoad_MPIAIJ"
1646 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1647 {
1648   Mat          A;
1649   PetscScalar  *vals,*svals;
1650   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1651   MPI_Status   status;
1652   int          i,nz,ierr,j,rstart,rend,fd;
1653   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1654   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1655   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1656 
1657   PetscFunctionBegin;
1658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1659   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1660   if (!rank) {
1661     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1662     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1663     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1664     if (header[3] < 0) {
1665       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1666     }
1667   }
1668 
1669   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1670   M = header[1]; N = header[2];
1671   /* determine ownership of all rows */
1672   m = M/size + ((M % size) > rank);
1673   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1674   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1675   rowners[0] = 0;
1676   for (i=2; i<=size; i++) {
1677     rowners[i] += rowners[i-1];
1678   }
1679   rstart = rowners[rank];
1680   rend   = rowners[rank+1];
1681 
1682   /* distribute row lengths to all processors */
1683   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1684   offlens = ourlens + (rend-rstart);
1685   if (!rank) {
1686     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1687     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1688     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1689     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1690     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1691     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1692   } else {
1693     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1694   }
1695 
1696   if (!rank) {
1697     /* calculate the number of nonzeros on each processor */
1698     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1699     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1700     for (i=0; i<size; i++) {
1701       for (j=rowners[i]; j< rowners[i+1]; j++) {
1702         procsnz[i] += rowlengths[j];
1703       }
1704     }
1705     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1706 
1707     /* determine max buffer needed and allocate it */
1708     maxnz = 0;
1709     for (i=0; i<size; i++) {
1710       maxnz = PetscMax(maxnz,procsnz[i]);
1711     }
1712     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1713 
1714     /* read in my part of the matrix column indices  */
1715     nz   = procsnz[0];
1716     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1717     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1718 
1719     /* read in every one elses and ship off */
1720     for (i=1; i<size; i++) {
1721       nz   = procsnz[i];
1722       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1723       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1724     }
1725     ierr = PetscFree(cols);CHKERRQ(ierr);
1726   } else {
1727     /* determine buffer space needed for message */
1728     nz = 0;
1729     for (i=0; i<m; i++) {
1730       nz += ourlens[i];
1731     }
1732     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1733 
1734     /* receive message of column indices*/
1735     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1736     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1737     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1738   }
1739 
1740   /* determine column ownership if matrix is not square */
1741   if (N != M) {
1742     n      = N/size + ((N % size) > rank);
1743     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1744     cstart = cend - n;
1745   } else {
1746     cstart = rstart;
1747     cend   = rend;
1748     n      = cend - cstart;
1749   }
1750 
1751   /* loop over local rows, determining number of off diagonal entries */
1752   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1753   jj = 0;
1754   for (i=0; i<m; i++) {
1755     for (j=0; j<ourlens[i]; j++) {
1756       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1757       jj++;
1758     }
1759   }
1760 
1761   /* create our matrix */
1762   for (i=0; i<m; i++) {
1763     ourlens[i] -= offlens[i];
1764   }
1765   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1766   A = *newmat;
1767   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1768   for (i=0; i<m; i++) {
1769     ourlens[i] += offlens[i];
1770   }
1771 
1772   if (!rank) {
1773     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1774 
1775     /* read in my part of the matrix numerical values  */
1776     nz   = procsnz[0];
1777     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1778 
1779     /* insert into matrix */
1780     jj      = rstart;
1781     smycols = mycols;
1782     svals   = vals;
1783     for (i=0; i<m; i++) {
1784       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1785       smycols += ourlens[i];
1786       svals   += ourlens[i];
1787       jj++;
1788     }
1789 
1790     /* read in other processors and ship out */
1791     for (i=1; i<size; i++) {
1792       nz   = procsnz[i];
1793       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1794       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1795     }
1796     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1797   } else {
1798     /* receive numeric values */
1799     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1800 
1801     /* receive message of values*/
1802     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1803     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1804     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1805 
1806     /* insert into matrix */
1807     jj      = rstart;
1808     smycols = mycols;
1809     svals   = vals;
1810     for (i=0; i<m; i++) {
1811       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1812       smycols += ourlens[i];
1813       svals   += ourlens[i];
1814       jj++;
1815     }
1816   }
1817   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1818   ierr = PetscFree(vals);CHKERRQ(ierr);
1819   ierr = PetscFree(mycols);CHKERRQ(ierr);
1820   ierr = PetscFree(rowners);CHKERRQ(ierr);
1821 
1822   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1823   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 EXTERN_C_END
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1830 /*
1831     Not great since it makes two copies of the submatrix, first an SeqAIJ
1832   in local and then by concatenating the local matrices the end result.
1833   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1834 */
1835 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1836 {
1837   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1838   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1839   Mat          *local,M,Mreuse;
1840   PetscScalar  *vwork,*aa;
1841   MPI_Comm     comm = mat->comm;
1842   Mat_SeqAIJ   *aij;
1843 
1844 
1845   PetscFunctionBegin;
1846   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1847   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1848 
1849   if (call ==  MAT_REUSE_MATRIX) {
1850     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1851     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1852     local = &Mreuse;
1853     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1854   } else {
1855     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1856     Mreuse = *local;
1857     ierr   = PetscFree(local);CHKERRQ(ierr);
1858   }
1859 
1860   /*
1861       m - number of local rows
1862       n - number of columns (same on all processors)
1863       rstart - first row in new global matrix generated
1864   */
1865   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1866   if (call == MAT_INITIAL_MATRIX) {
1867     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1868     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1869     ii  = aij->i;
1870     jj  = aij->j;
1871 
1872     /*
1873         Determine the number of non-zeros in the diagonal and off-diagonal
1874         portions of the matrix in order to do correct preallocation
1875     */
1876 
1877     /* first get start and end of "diagonal" columns */
1878     if (csize == PETSC_DECIDE) {
1879       nlocal = n/size + ((n % size) > rank);
1880     } else {
1881       nlocal = csize;
1882     }
1883     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1884     rstart = rend - nlocal;
1885     if (rank == size - 1 && rend != n) {
1886       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1887     }
1888 
1889     /* next, compute all the lengths */
1890     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1891     olens = dlens + m;
1892     for (i=0; i<m; i++) {
1893       jend = ii[i+1] - ii[i];
1894       olen = 0;
1895       dlen = 0;
1896       for (j=0; j<jend; j++) {
1897         if (*jj < rstart || *jj >= rend) olen++;
1898         else dlen++;
1899         jj++;
1900       }
1901       olens[i] = olen;
1902       dlens[i] = dlen;
1903     }
1904     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1905     ierr = PetscFree(dlens);CHKERRQ(ierr);
1906   } else {
1907     int ml,nl;
1908 
1909     M = *newmat;
1910     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1911     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1912     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1913     /*
1914          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1915        rather than the slower MatSetValues().
1916     */
1917     M->was_assembled = PETSC_TRUE;
1918     M->assembled     = PETSC_FALSE;
1919   }
1920   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1921   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1922   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1923   ii  = aij->i;
1924   jj  = aij->j;
1925   aa  = aij->a;
1926   for (i=0; i<m; i++) {
1927     row   = rstart + i;
1928     nz    = ii[i+1] - ii[i];
1929     cwork = jj;     jj += nz;
1930     vwork = aa;     aa += nz;
1931     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1932   }
1933 
1934   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1935   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1936   *newmat = M;
1937 
1938   /* save submatrix used in processor for next request */
1939   if (call ==  MAT_INITIAL_MATRIX) {
1940     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1941     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1942   }
1943 
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1949 /*@C
1950    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1951    (the default parallel PETSc format).  For good matrix assembly performance
1952    the user should preallocate the matrix storage by setting the parameters
1953    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1954    performance can be increased by more than a factor of 50.
1955 
1956    Collective on MPI_Comm
1957 
1958    Input Parameters:
1959 +  A - the matrix
1960 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1961            (same value is used for all local rows)
1962 .  d_nnz - array containing the number of nonzeros in the various rows of the
1963            DIAGONAL portion of the local submatrix (possibly different for each row)
1964            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1965            The size of this array is equal to the number of local rows, i.e 'm'.
1966            You must leave room for the diagonal entry even if it is zero.
1967 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1968            submatrix (same value is used for all local rows).
1969 -  o_nnz - array containing the number of nonzeros in the various rows of the
1970            OFF-DIAGONAL portion of the local submatrix (possibly different for
1971            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1972            structure. The size of this array is equal to the number
1973            of local rows, i.e 'm'.
1974 
1975    The AIJ format (also called the Yale sparse matrix format or
1976    compressed row storage), is fully compatible with standard Fortran 77
1977    storage.  That is, the stored row and column indices can begin at
1978    either one (as in Fortran) or zero.  See the users manual for details.
1979 
1980    The user MUST specify either the local or global matrix dimensions
1981    (possibly both).
1982 
1983    The parallel matrix is partitioned such that the first m0 rows belong to
1984    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1985    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1986 
1987    The DIAGONAL portion of the local submatrix of a processor can be defined
1988    as the submatrix which is obtained by extraction the part corresponding
1989    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1990    first row that belongs to the processor, and r2 is the last row belonging
1991    to the this processor. This is a square mxm matrix. The remaining portion
1992    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1993 
1994    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1995 
1996    By default, this format uses inodes (identical nodes) when possible.
1997    We search for consecutive rows with the same nonzero structure, thereby
1998    reusing matrix information to achieve increased efficiency.
1999 
2000    Options Database Keys:
2001 +  -mat_aij_no_inode  - Do not use inodes
2002 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2003 -  -mat_aij_oneindex - Internally use indexing starting at 1
2004         rather than 0.  Note that when calling MatSetValues(),
2005         the user still MUST index entries starting at 0!
2006 
2007    Example usage:
2008 
2009    Consider the following 8x8 matrix with 34 non-zero values, that is
2010    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2011    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2012    as follows:
2013 
2014 .vb
2015             1  2  0  |  0  3  0  |  0  4
2016     Proc0   0  5  6  |  7  0  0  |  8  0
2017             9  0 10  | 11  0  0  | 12  0
2018     -------------------------------------
2019            13  0 14  | 15 16 17  |  0  0
2020     Proc1   0 18  0  | 19 20 21  |  0  0
2021             0  0  0  | 22 23  0  | 24  0
2022     -------------------------------------
2023     Proc2  25 26 27  |  0  0 28  | 29  0
2024            30  0  0  | 31 32 33  |  0 34
2025 .ve
2026 
2027    This can be represented as a collection of submatrices as:
2028 
2029 .vb
2030       A B C
2031       D E F
2032       G H I
2033 .ve
2034 
2035    Where the submatrices A,B,C are owned by proc0, D,E,F are
2036    owned by proc1, G,H,I are owned by proc2.
2037 
2038    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2039    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2040    The 'M','N' parameters are 8,8, and have the same values on all procs.
2041 
2042    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2043    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2044    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2045    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2046    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2047    matrix, ans [DF] as another SeqAIJ matrix.
2048 
2049    When d_nz, o_nz parameters are specified, d_nz storage elements are
2050    allocated for every row of the local diagonal submatrix, and o_nz
2051    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2052    One way to choose d_nz and o_nz is to use the max nonzerors per local
2053    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2054    In this case, the values of d_nz,o_nz are:
2055 .vb
2056      proc0 : dnz = 2, o_nz = 2
2057      proc1 : dnz = 3, o_nz = 2
2058      proc2 : dnz = 1, o_nz = 4
2059 .ve
2060    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2061    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2062    for proc3. i.e we are using 12+15+10=37 storage locations to store
2063    34 values.
2064 
2065    When d_nnz, o_nnz parameters are specified, the storage is specified
2066    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2067    In the above case the values for d_nnz,o_nnz are:
2068 .vb
2069      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2070      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2071      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2072 .ve
2073    Here the space allocated is sum of all the above values i.e 34, and
2074    hence pre-allocation is perfect.
2075 
2076    Level: intermediate
2077 
2078 .keywords: matrix, aij, compressed row, sparse, parallel
2079 
2080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2081 @*/
2082 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2083 {
2084   Mat_MPIAIJ   *b;
2085   int          ierr,i;
2086   PetscTruth   flg2;
2087 
2088   PetscFunctionBegin;
2089   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2090   if (!flg2) PetscFunctionReturn(0);
2091   B->preallocated = PETSC_TRUE;
2092   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2093   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2094   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2095   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2096   if (d_nnz) {
2097     for (i=0; i<B->m; i++) {
2098       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2099     }
2100   }
2101   if (o_nnz) {
2102     for (i=0; i<B->m; i++) {
2103       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2104     }
2105   }
2106   b = (Mat_MPIAIJ*)B->data;
2107 
2108   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2109   PetscLogObjectParent(B,b->A);
2110   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2111   PetscLogObjectParent(B,b->B);
2112 
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatCreateMPIAIJ"
2118 /*@C
2119    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2120    (the default parallel PETSc format).  For good matrix assembly performance
2121    the user should preallocate the matrix storage by setting the parameters
2122    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2123    performance can be increased by more than a factor of 50.
2124 
2125    Collective on MPI_Comm
2126 
2127    Input Parameters:
2128 +  comm - MPI communicator
2129 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2130            This value should be the same as the local size used in creating the
2131            y vector for the matrix-vector product y = Ax.
2132 .  n - This value should be the same as the local size used in creating the
2133        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2134        calculated if N is given) For square matrices n is almost always m.
2135 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2136 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2137 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2138            (same value is used for all local rows)
2139 .  d_nnz - array containing the number of nonzeros in the various rows of the
2140            DIAGONAL portion of the local submatrix (possibly different for each row)
2141            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2142            The size of this array is equal to the number of local rows, i.e 'm'.
2143            You must leave room for the diagonal entry even if it is zero.
2144 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2145            submatrix (same value is used for all local rows).
2146 -  o_nnz - array containing the number of nonzeros in the various rows of the
2147            OFF-DIAGONAL portion of the local submatrix (possibly different for
2148            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2149            structure. The size of this array is equal to the number
2150            of local rows, i.e 'm'.
2151 
2152    Output Parameter:
2153 .  A - the matrix
2154 
2155    Notes:
2156    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2157    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2158    storage requirements for this matrix.
2159 
2160    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2161    processor than it must be used on all processors that share the object for
2162    that argument.
2163 
2164    The AIJ format (also called the Yale sparse matrix format or
2165    compressed row storage), is fully compatible with standard Fortran 77
2166    storage.  That is, the stored row and column indices can begin at
2167    either one (as in Fortran) or zero.  See the users manual for details.
2168 
2169    The user MUST specify either the local or global matrix dimensions
2170    (possibly both).
2171 
2172    The parallel matrix is partitioned such that the first m0 rows belong to
2173    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2174    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2175 
2176    The DIAGONAL portion of the local submatrix of a processor can be defined
2177    as the submatrix which is obtained by extraction the part corresponding
2178    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2179    first row that belongs to the processor, and r2 is the last row belonging
2180    to the this processor. This is a square mxm matrix. The remaining portion
2181    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2182 
2183    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2184 
2185    By default, this format uses inodes (identical nodes) when possible.
2186    We search for consecutive rows with the same nonzero structure, thereby
2187    reusing matrix information to achieve increased efficiency.
2188 
2189    Options Database Keys:
2190 +  -mat_aij_no_inode  - Do not use inodes
2191 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2192 -  -mat_aij_oneindex - Internally use indexing starting at 1
2193         rather than 0.  Note that when calling MatSetValues(),
2194         the user still MUST index entries starting at 0!
2195 
2196 
2197    Example usage:
2198 
2199    Consider the following 8x8 matrix with 34 non-zero values, that is
2200    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2201    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2202    as follows:
2203 
2204 .vb
2205             1  2  0  |  0  3  0  |  0  4
2206     Proc0   0  5  6  |  7  0  0  |  8  0
2207             9  0 10  | 11  0  0  | 12  0
2208     -------------------------------------
2209            13  0 14  | 15 16 17  |  0  0
2210     Proc1   0 18  0  | 19 20 21  |  0  0
2211             0  0  0  | 22 23  0  | 24  0
2212     -------------------------------------
2213     Proc2  25 26 27  |  0  0 28  | 29  0
2214            30  0  0  | 31 32 33  |  0 34
2215 .ve
2216 
2217    This can be represented as a collection of submatrices as:
2218 
2219 .vb
2220       A B C
2221       D E F
2222       G H I
2223 .ve
2224 
2225    Where the submatrices A,B,C are owned by proc0, D,E,F are
2226    owned by proc1, G,H,I are owned by proc2.
2227 
2228    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2229    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2230    The 'M','N' parameters are 8,8, and have the same values on all procs.
2231 
2232    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2233    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2234    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2235    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2236    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2237    matrix, ans [DF] as another SeqAIJ matrix.
2238 
2239    When d_nz, o_nz parameters are specified, d_nz storage elements are
2240    allocated for every row of the local diagonal submatrix, and o_nz
2241    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2242    One way to choose d_nz and o_nz is to use the max nonzerors per local
2243    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2244    In this case, the values of d_nz,o_nz are:
2245 .vb
2246      proc0 : dnz = 2, o_nz = 2
2247      proc1 : dnz = 3, o_nz = 2
2248      proc2 : dnz = 1, o_nz = 4
2249 .ve
2250    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2251    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2252    for proc3. i.e we are using 12+15+10=37 storage locations to store
2253    34 values.
2254 
2255    When d_nnz, o_nnz parameters are specified, the storage is specified
2256    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2257    In the above case the values for d_nnz,o_nnz are:
2258 .vb
2259      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2260      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2261      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2262 .ve
2263    Here the space allocated is sum of all the above values i.e 34, and
2264    hence pre-allocation is perfect.
2265 
2266    Level: intermediate
2267 
2268 .keywords: matrix, aij, compressed row, sparse, parallel
2269 
2270 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2271 @*/
2272 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2273 {
2274   int ierr,size;
2275 
2276   PetscFunctionBegin;
2277   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2279   if (size > 1) {
2280     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2281     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2282   } else {
2283     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2284     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2291 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2292 {
2293   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2294   PetscFunctionBegin;
2295   *Ad     = a->A;
2296   *Ao     = a->B;
2297   *colmap = a->garray;
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2303 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2304 {
2305   int        ierr;
2306   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2307 
2308   PetscFunctionBegin;
2309   if (coloring->ctype == IS_COLORING_LOCAL) {
2310     int        *allcolors,*colors,i;
2311     ISColoring ocoloring;
2312 
2313     /* set coloring for diagonal portion */
2314     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2315 
2316     /* set coloring for off-diagonal portion */
2317     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2318     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2319     for (i=0; i<a->B->n; i++) {
2320       colors[i] = allcolors[a->garray[i]];
2321     }
2322     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2323     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2324     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2325     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2326   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2327     int        *colors,i,*larray;
2328     ISColoring ocoloring;
2329 
2330     /* set coloring for diagonal portion */
2331     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2332     for (i=0; i<a->A->n; i++) {
2333       larray[i] = i + a->cstart;
2334     }
2335     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2336     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2337     for (i=0; i<a->A->n; i++) {
2338       colors[i] = coloring->colors[larray[i]];
2339     }
2340     ierr = PetscFree(larray);CHKERRQ(ierr);
2341     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2342     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2343     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2344 
2345     /* set coloring for off-diagonal portion */
2346     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2347     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2348     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2349     for (i=0; i<a->B->n; i++) {
2350       colors[i] = coloring->colors[larray[i]];
2351     }
2352     ierr = PetscFree(larray);CHKERRQ(ierr);
2353     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2354     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2355     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2356   } else {
2357     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2358   }
2359 
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2365 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2366 {
2367   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2368   int        ierr;
2369 
2370   PetscFunctionBegin;
2371   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2372   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2378 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2379 {
2380   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2381   int        ierr;
2382 
2383   PetscFunctionBegin;
2384   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2385   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388