xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e605e3f2ce6bc23be6560fc66e15402fe158ff8e)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358   PetscTruth  flag;
359 
360   PetscFunctionBegin;
361   if (!aij->donotstash) {
362     while (1) {
363       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
364       if (!flg) break;
365 
366       for (i=0; i<n;) {
367         /* Now identify the consecutive vals belonging to the same row */
368         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
369         if (j < n) ncols = j-i;
370         else       ncols = n-i;
371         /* Now assemble all these values with a single function call */
372         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
373         i = j;
374       }
375     }
376     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
377   }
378 
379   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
380   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
381 
382   /* determine if any processor has disassembled, if so we must
383      also disassemble ourselfs, in order that we may reassemble. */
384   /*
385      if nonzero structure of submatrix B cannot change then we know that
386      no processor disassembled thus we can skip this stuff
387   */
388   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
389     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
390     if (mat->was_assembled && !other_disassembled) {
391       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
392     }
393   }
394 
395   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
396     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
397   }
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405 #if defined(PETSC_HAVE_SUPERLUDIST)
406   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
407   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
408 #endif
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
414 int MatZeroEntries_MPIAIJ(Mat A)
415 {
416   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
417   int        ierr;
418 
419   PetscFunctionBegin;
420   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
421   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatZeroRows_MPIAIJ"
427 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
428 {
429   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
430   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
431   int            *procs,*nprocs,j,idx,nsends,*work,row;
432   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
433   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
434   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
435   MPI_Comm       comm = A->comm;
436   MPI_Request    *send_waits,*recv_waits;
437   MPI_Status     recv_status,*send_status;
438   IS             istmp;
439   PetscTruth     found;
440 
441   PetscFunctionBegin;
442   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
443   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
444 
445   /*  first count number of contributors to each processor */
446   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
447   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
448   procs  = nprocs + size;
449   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
450   for (i=0; i<N; i++) {
451     idx = rows[i];
452     found = PETSC_FALSE;
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
456       }
457     }
458     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459   }
460   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
461 
462   /* inform other processors of number of messages and max length*/
463   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
464   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
465   nrecvs = work[size+rank];
466   nmax   = work[rank];
467   ierr   = PetscFree(work);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (procs[i]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
582   int        ierr,nt;
583 
584   PetscFunctionBegin;
585   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
586   if (nt != A->n) {
587     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
588   }
589   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
590   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
591   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatMultAdd_MPIAIJ"
598 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
599 {
600   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
601   int        ierr;
602 
603   PetscFunctionBegin;
604   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
605   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
606   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
613 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
614 {
615   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
616   int        ierr;
617 
618   PetscFunctionBegin;
619   /* do nondiagonal part */
620   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
621   /* send it on its way */
622   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   /* do local part */
624   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
625   /* receive remote parts: note this assumes the values are not actually */
626   /* inserted in yy until the next line, which is true for my implementation*/
627   /* but is not perhaps always true. */
628   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
634 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
635 {
636   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
637   int        ierr;
638 
639   PetscFunctionBegin;
640   /* do nondiagonal part */
641   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
642   /* send it on its way */
643   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   /* do local part */
645   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
646   /* receive remote parts: note this assumes the values are not actually */
647   /* inserted in yy until the next line, which is true for my implementation*/
648   /* but is not perhaps always true. */
649   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 /*
654   This only works correctly for square matrices where the subblock A->A is the
655    diagonal block
656 */
657 #undef __FUNCT__
658 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
659 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
660 {
661   int        ierr;
662   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
663 
664   PetscFunctionBegin;
665   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
666   if (a->rstart != a->cstart || a->rend != a->cend) {
667     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
668   }
669   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 #undef __FUNCT__
674 #define __FUNCT__ "MatScale_MPIAIJ"
675 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
676 {
677   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
678   int        ierr;
679 
680   PetscFunctionBegin;
681   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
682   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatDestroy_MPIAIJ"
688 int MatDestroy_MPIAIJ(Mat mat)
689 {
690   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
691   int        ierr;
692 
693   PetscFunctionBegin;
694 #if defined(PETSC_USE_LOG)
695   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
696 #endif
697   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
698   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
699   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
701 #if defined (PETSC_USE_CTABLE)
702   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
703 #else
704   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
705 #endif
706   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
707   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
708   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
709   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
710   ierr = PetscFree(aij);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
716 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
717 {
718   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
719   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
720   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
721   PetscTruth        isdraw,isascii,flg;
722   PetscViewer       sviewer;
723   PetscViewerFormat format;
724 
725   PetscFunctionBegin;
726   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
727   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
728   if (isascii) {
729     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
730     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
731       MatInfo info;
732       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
733       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
734       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
735       if (flg) {
736         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
737 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
738       } else {
739         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
740 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
741       }
742       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
743       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
744       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
747       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
748       PetscFunctionReturn(0);
749     } else if (format == PETSC_VIEWER_ASCII_INFO) {
750       PetscFunctionReturn(0);
751     }
752   } else if (isdraw) {
753     PetscDraw       draw;
754     PetscTruth isnull;
755     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
756     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
757   }
758 
759   if (size == 1) {
760     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
761     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
762   } else {
763     /* assemble the entire matrix onto first processor. */
764     Mat         A;
765     Mat_SeqAIJ *Aloc;
766     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
767     PetscScalar *a;
768 
769     if (!rank) {
770       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
771     } else {
772       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     }
774     PetscLogObjectParent(mat,A);
775 
776     /* copy over the A part */
777     Aloc = (Mat_SeqAIJ*)aij->A->data;
778     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
779     row = aij->rstart;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
781     for (i=0; i<m; i++) {
782       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
783       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
784     }
785     aj = Aloc->j;
786     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
787 
788     /* copy over the B part */
789     Aloc = (Mat_SeqAIJ*)aij->B->data;
790     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
791     row  = aij->rstart;
792     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
793     ct   = cols;
794     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
795     for (i=0; i<m; i++) {
796       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
797       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
798     }
799     ierr = PetscFree(ct);CHKERRQ(ierr);
800     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
801     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
802     /*
803        Everyone has to call to draw the matrix since the graphics waits are
804        synchronized across all processors that share the PetscDraw object
805     */
806     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
807     if (!rank) {
808       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
809       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
810     }
811     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
812     ierr = MatDestroy(A);CHKERRQ(ierr);
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatView_MPIAIJ"
819 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
820 {
821   int        ierr;
822   PetscTruth isascii,isdraw,issocket,isbinary;
823 
824   PetscFunctionBegin;
825   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
826   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
827   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
828   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
829   if (isascii || isdraw || isbinary || issocket) {
830     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
831   } else {
832     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatRelax_MPIAIJ"
841 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
842 {
843   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
844   int          ierr;
845   Vec          bb1;
846   PetscScalar  mone=-1.0;
847 
848   PetscFunctionBegin;
849 
850   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
851 
852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
855       its--;
856     }
857 
858     while (its--) {
859       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
860       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
861 
862       /* update rhs: bb1 = bb - B*x */
863       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
864       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
865 
866       /* local sweep */
867       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
868       CHKERRQ(ierr);
869     }
870   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
873       its--;
874     }
875     while (its--) {
876       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
877       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
878 
879       /* update rhs: bb1 = bb - B*x */
880       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
881       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
882 
883       /* local sweep */
884       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
885       CHKERRQ(ierr);
886     }
887   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
888     if (flag & SOR_ZERO_INITIAL_GUESS) {
889       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
890       its--;
891     }
892     while (its--) {
893       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
894       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
895 
896       /* update rhs: bb1 = bb - B*x */
897       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
898       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
899 
900       /* local sweep */
901       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
902       CHKERRQ(ierr);
903     }
904   } else {
905     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
906   }
907 
908   ierr = VecDestroy(bb1);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatGetInfo_MPIAIJ"
914 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
915 {
916   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
917   Mat        A = mat->A,B = mat->B;
918   int        ierr;
919   PetscReal  isend[5],irecv[5];
920 
921   PetscFunctionBegin;
922   info->block_size     = 1.0;
923   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
924   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
925   isend[3] = info->memory;  isend[4] = info->mallocs;
926   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
927   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
928   isend[3] += info->memory;  isend[4] += info->mallocs;
929   if (flag == MAT_LOCAL) {
930     info->nz_used      = isend[0];
931     info->nz_allocated = isend[1];
932     info->nz_unneeded  = isend[2];
933     info->memory       = isend[3];
934     info->mallocs      = isend[4];
935   } else if (flag == MAT_GLOBAL_MAX) {
936     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
937     info->nz_used      = irecv[0];
938     info->nz_allocated = irecv[1];
939     info->nz_unneeded  = irecv[2];
940     info->memory       = irecv[3];
941     info->mallocs      = irecv[4];
942   } else if (flag == MAT_GLOBAL_SUM) {
943     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
944     info->nz_used      = irecv[0];
945     info->nz_allocated = irecv[1];
946     info->nz_unneeded  = irecv[2];
947     info->memory       = irecv[3];
948     info->mallocs      = irecv[4];
949   }
950   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
951   info->fill_ratio_needed = 0;
952   info->factor_mallocs    = 0;
953   info->rows_global       = (double)matin->M;
954   info->columns_global    = (double)matin->N;
955   info->rows_local        = (double)matin->m;
956   info->columns_local     = (double)matin->N;
957 
958   PetscFunctionReturn(0);
959 }
960 
961 #undef __FUNCT__
962 #define __FUNCT__ "MatSetOption_MPIAIJ"
963 int MatSetOption_MPIAIJ(Mat A,MatOption op)
964 {
965   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
966   int        ierr;
967 
968   PetscFunctionBegin;
969   switch (op) {
970   case MAT_NO_NEW_NONZERO_LOCATIONS:
971   case MAT_YES_NEW_NONZERO_LOCATIONS:
972   case MAT_COLUMNS_UNSORTED:
973   case MAT_COLUMNS_SORTED:
974   case MAT_NEW_NONZERO_ALLOCATION_ERR:
975   case MAT_KEEP_ZEROED_ROWS:
976   case MAT_NEW_NONZERO_LOCATION_ERR:
977   case MAT_USE_INODES:
978   case MAT_DO_NOT_USE_INODES:
979   case MAT_IGNORE_ZERO_ENTRIES:
980     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
981     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
982     break;
983   case MAT_ROW_ORIENTED:
984     a->roworiented = PETSC_TRUE;
985     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
986     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
987     break;
988   case MAT_ROWS_SORTED:
989   case MAT_ROWS_UNSORTED:
990   case MAT_YES_NEW_DIAGONALS:
991   case MAT_USE_SINGLE_PRECISION_SOLVES:
992     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
993     break;
994   case MAT_COLUMN_ORIENTED:
995     a->roworiented = PETSC_FALSE;
996     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
997     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
998     break;
999   case MAT_IGNORE_OFF_PROC_ENTRIES:
1000     a->donotstash = PETSC_TRUE;
1001     break;
1002   case MAT_NO_NEW_DIAGONALS:
1003     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1004     break;
1005   default:
1006     SETERRQ(PETSC_ERR_SUP,"unknown option");
1007     break;
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatGetRow_MPIAIJ"
1014 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1015 {
1016   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1017   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1018   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1019   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1020   int          *cmap,*idx_p;
1021 
1022   PetscFunctionBegin;
1023   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1024   mat->getrowactive = PETSC_TRUE;
1025 
1026   if (!mat->rowvalues && (idx || v)) {
1027     /*
1028         allocate enough space to hold information from the longest row.
1029     */
1030     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1031     int     max = 1,tmp;
1032     for (i=0; i<matin->m; i++) {
1033       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1034       if (max < tmp) { max = tmp; }
1035     }
1036     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1037     mat->rowindices = (int*)(mat->rowvalues + max);
1038   }
1039 
1040   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1041   lrow = row - rstart;
1042 
1043   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1044   if (!v)   {pvA = 0; pvB = 0;}
1045   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1046   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1047   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1048   nztot = nzA + nzB;
1049 
1050   cmap  = mat->garray;
1051   if (v  || idx) {
1052     if (nztot) {
1053       /* Sort by increasing column numbers, assuming A and B already sorted */
1054       int imark = -1;
1055       if (v) {
1056         *v = v_p = mat->rowvalues;
1057         for (i=0; i<nzB; i++) {
1058           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1059           else break;
1060         }
1061         imark = i;
1062         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1063         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1064       }
1065       if (idx) {
1066         *idx = idx_p = mat->rowindices;
1067         if (imark > -1) {
1068           for (i=0; i<imark; i++) {
1069             idx_p[i] = cmap[cworkB[i]];
1070           }
1071         } else {
1072           for (i=0; i<nzB; i++) {
1073             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1074             else break;
1075           }
1076           imark = i;
1077         }
1078         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1079         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1080       }
1081     } else {
1082       if (idx) *idx = 0;
1083       if (v)   *v   = 0;
1084     }
1085   }
1086   *nz = nztot;
1087   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1088   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1094 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1095 {
1096   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1097 
1098   PetscFunctionBegin;
1099   if (aij->getrowactive == PETSC_FALSE) {
1100     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1101   }
1102   aij->getrowactive = PETSC_FALSE;
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatNorm_MPIAIJ"
1108 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1109 {
1110   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1111   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1112   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1113   PetscReal    sum = 0.0;
1114   PetscScalar  *v;
1115 
1116   PetscFunctionBegin;
1117   if (aij->size == 1) {
1118     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1119   } else {
1120     if (type == NORM_FROBENIUS) {
1121       v = amat->a;
1122       for (i=0; i<amat->nz; i++) {
1123 #if defined(PETSC_USE_COMPLEX)
1124         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1125 #else
1126         sum += (*v)*(*v); v++;
1127 #endif
1128       }
1129       v = bmat->a;
1130       for (i=0; i<bmat->nz; i++) {
1131 #if defined(PETSC_USE_COMPLEX)
1132         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1133 #else
1134         sum += (*v)*(*v); v++;
1135 #endif
1136       }
1137       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1138       *norm = sqrt(*norm);
1139     } else if (type == NORM_1) { /* max column norm */
1140       PetscReal *tmp,*tmp2;
1141       int    *jj,*garray = aij->garray;
1142       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1143       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1144       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1145       *norm = 0.0;
1146       v = amat->a; jj = amat->j;
1147       for (j=0; j<amat->nz; j++) {
1148         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1149       }
1150       v = bmat->a; jj = bmat->j;
1151       for (j=0; j<bmat->nz; j++) {
1152         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1153       }
1154       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1155       for (j=0; j<mat->N; j++) {
1156         if (tmp2[j] > *norm) *norm = tmp2[j];
1157       }
1158       ierr = PetscFree(tmp);CHKERRQ(ierr);
1159       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1160     } else if (type == NORM_INFINITY) { /* max row norm */
1161       PetscReal ntemp = 0.0;
1162       for (j=0; j<aij->A->m; j++) {
1163         v = amat->a + amat->i[j] + shift;
1164         sum = 0.0;
1165         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1166           sum += PetscAbsScalar(*v); v++;
1167         }
1168         v = bmat->a + bmat->i[j] + shift;
1169         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1170           sum += PetscAbsScalar(*v); v++;
1171         }
1172         if (sum > ntemp) ntemp = sum;
1173       }
1174       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1175     } else {
1176       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1177     }
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatTranspose_MPIAIJ"
1184 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1185 {
1186   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1187   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1188   int          ierr,shift = Aloc->indexshift;
1189   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1190   Mat          B;
1191   PetscScalar  *array;
1192 
1193   PetscFunctionBegin;
1194   if (!matout && M != N) {
1195     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1196   }
1197 
1198   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1199 
1200   /* copy over the A part */
1201   Aloc = (Mat_SeqAIJ*)a->A->data;
1202   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1203   row = a->rstart;
1204   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1205   for (i=0; i<m; i++) {
1206     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1207     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1208   }
1209   aj = Aloc->j;
1210   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1211 
1212   /* copy over the B part */
1213   Aloc = (Mat_SeqAIJ*)a->B->data;
1214   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1215   row  = a->rstart;
1216   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1217   ct   = cols;
1218   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1219   for (i=0; i<m; i++) {
1220     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1221     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1222   }
1223   ierr = PetscFree(ct);CHKERRQ(ierr);
1224   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1225   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1226   if (matout) {
1227     *matout = B;
1228   } else {
1229     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1230   }
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1236 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1237 {
1238   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1239   Mat        a = aij->A,b = aij->B;
1240   int        ierr,s1,s2,s3;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1244   if (rr) {
1245     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1246     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1247     /* Overlap communication with computation. */
1248     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1249   }
1250   if (ll) {
1251     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1252     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1253     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1254   }
1255   /* scale  the diagonal block */
1256   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1257 
1258   if (rr) {
1259     /* Do a scatter end and then right scale the off-diagonal block */
1260     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1261     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1262   }
1263 
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1270 int MatPrintHelp_MPIAIJ(Mat A)
1271 {
1272   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1273   int        ierr;
1274 
1275   PetscFunctionBegin;
1276   if (!a->rank) {
1277     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1278   }
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1284 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1285 {
1286   PetscFunctionBegin;
1287   *bs = 1;
1288   PetscFunctionReturn(0);
1289 }
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1292 int MatSetUnfactored_MPIAIJ(Mat A)
1293 {
1294   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1295   int        ierr;
1296 
1297   PetscFunctionBegin;
1298   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatEqual_MPIAIJ"
1304 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1305 {
1306   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1307   Mat        a,b,c,d;
1308   PetscTruth flg;
1309   int        ierr;
1310 
1311   PetscFunctionBegin;
1312   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1313   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1314   a = matA->A; b = matA->B;
1315   c = matB->A; d = matB->B;
1316 
1317   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1318   if (flg == PETSC_TRUE) {
1319     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1320   }
1321   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatCopy_MPIAIJ"
1327 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1328 {
1329   int        ierr;
1330   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1331   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1332   PetscTruth flg;
1333 
1334   PetscFunctionBegin;
1335   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1336   if (str != SAME_NONZERO_PATTERN || !flg) {
1337     /* because of the column compression in the off-processor part of the matrix a->B,
1338        the number of columns in a->B and b->B may be different, hence we cannot call
1339        the MatCopy() directly on the two parts. If need be, we can provide a more
1340        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1341        then copying the submatrices */
1342     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1343   } else {
1344     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1345     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1346   }
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1352 int MatSetUpPreallocation_MPIAIJ(Mat A)
1353 {
1354   int        ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1362 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1363 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1364 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1365 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1366 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1367 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1368 #endif
1369 
1370 /* -------------------------------------------------------------------*/
1371 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1372        MatGetRow_MPIAIJ,
1373        MatRestoreRow_MPIAIJ,
1374        MatMult_MPIAIJ,
1375        MatMultAdd_MPIAIJ,
1376        MatMultTranspose_MPIAIJ,
1377        MatMultTransposeAdd_MPIAIJ,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        MatRelax_MPIAIJ,
1385        MatTranspose_MPIAIJ,
1386        MatGetInfo_MPIAIJ,
1387        MatEqual_MPIAIJ,
1388        MatGetDiagonal_MPIAIJ,
1389        MatDiagonalScale_MPIAIJ,
1390        MatNorm_MPIAIJ,
1391        MatAssemblyBegin_MPIAIJ,
1392        MatAssemblyEnd_MPIAIJ,
1393        0,
1394        MatSetOption_MPIAIJ,
1395        MatZeroEntries_MPIAIJ,
1396        MatZeroRows_MPIAIJ,
1397 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1398        MatLUFactorSymbolic_MPIAIJ_TFS,
1399 #else
1400        0,
1401 #endif
1402        0,
1403        0,
1404        0,
1405        MatSetUpPreallocation_MPIAIJ,
1406        0,
1407        0,
1408        0,
1409        0,
1410        MatDuplicate_MPIAIJ,
1411        0,
1412        0,
1413        0,
1414        0,
1415        0,
1416        MatGetSubMatrices_MPIAIJ,
1417        MatIncreaseOverlap_MPIAIJ,
1418        MatGetValues_MPIAIJ,
1419        MatCopy_MPIAIJ,
1420        MatPrintHelp_MPIAIJ,
1421        MatScale_MPIAIJ,
1422        0,
1423        0,
1424        0,
1425        MatGetBlockSize_MPIAIJ,
1426        0,
1427        0,
1428        0,
1429        0,
1430        MatFDColoringCreate_MPIAIJ,
1431        0,
1432        MatSetUnfactored_MPIAIJ,
1433        0,
1434        0,
1435        MatGetSubMatrix_MPIAIJ,
1436        MatDestroy_MPIAIJ,
1437        MatView_MPIAIJ,
1438        MatGetPetscMaps_Petsc,
1439        0,
1440        0,
1441        0,
1442        0,
1443        0,
1444        0,
1445        0,
1446        0,
1447        MatSetColoring_MPIAIJ,
1448        MatSetValuesAdic_MPIAIJ,
1449        MatSetValuesAdifor_MPIAIJ
1450 };
1451 
1452 /* ----------------------------------------------------------------------------------------*/
1453 
1454 EXTERN_C_BEGIN
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1457 int MatStoreValues_MPIAIJ(Mat mat)
1458 {
1459   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1460   int        ierr;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1464   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 EXTERN_C_END
1468 
1469 EXTERN_C_BEGIN
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1472 int MatRetrieveValues_MPIAIJ(Mat mat)
1473 {
1474   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1475   int        ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1479   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 EXTERN_C_END
1483 
1484 #include "petscpc.h"
1485 EXTERN_C_BEGIN
1486 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1487 EXTERN_C_END
1488 
1489 EXTERN_C_BEGIN
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatCreate_MPIAIJ"
1492 int MatCreate_MPIAIJ(Mat B)
1493 {
1494   Mat_MPIAIJ   *b;
1495   int          ierr,i,size;
1496   PetscTruth   flg;
1497 
1498   PetscFunctionBegin;
1499   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1500 
1501   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1502   B->data         = (void*)b;
1503   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1504   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1505   B->factor       = 0;
1506   B->assembled    = PETSC_FALSE;
1507   B->mapping      = 0;
1508 
1509   B->insertmode      = NOT_SET_VALUES;
1510   b->size            = size;
1511   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1512 
1513   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1514   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1515 
1516   /* the information in the maps duplicates the information computed below, eventually
1517      we should remove the duplicate information that is not contained in the maps */
1518   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1519   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1520 
1521   /* build local table of row and column ownerships */
1522   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1523   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1524   b->cowners = b->rowners + b->size + 2;
1525   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1526   b->rowners[0] = 0;
1527   for (i=2; i<=b->size; i++) {
1528     b->rowners[i] += b->rowners[i-1];
1529   }
1530   b->rstart = b->rowners[b->rank];
1531   b->rend   = b->rowners[b->rank+1];
1532   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1533   b->cowners[0] = 0;
1534   for (i=2; i<=b->size; i++) {
1535     b->cowners[i] += b->cowners[i-1];
1536   }
1537   b->cstart = b->cowners[b->rank];
1538   b->cend   = b->cowners[b->rank+1];
1539 
1540   /* build cache for off array entries formed */
1541   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1542   b->donotstash  = PETSC_FALSE;
1543   b->colmap      = 0;
1544   b->garray      = 0;
1545   b->roworiented = PETSC_TRUE;
1546 
1547   /* stuff used for matrix vector multiply */
1548   b->lvec      = PETSC_NULL;
1549   b->Mvctx     = PETSC_NULL;
1550 
1551   /* stuff for MatGetRow() */
1552   b->rowindices   = 0;
1553   b->rowvalues    = 0;
1554   b->getrowactive = PETSC_FALSE;
1555 
1556 /* #if defined(PETSC_HAVE_SUPERLUDIST)
1557 
1558   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1559   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1560 #endif */
1561 
1562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1563                                      "MatStoreValues_MPIAIJ",
1564                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1565   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1566                                      "MatRetrieveValues_MPIAIJ",
1567                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1568   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1569                                      "MatGetDiagonalBlock_MPIAIJ",
1570                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1571 
1572   PetscFunctionReturn(0);
1573 }
1574 EXTERN_C_END
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1578 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1579 {
1580   Mat        mat;
1581   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1582   int        ierr;
1583 
1584   PetscFunctionBegin;
1585   *newmat       = 0;
1586   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1587   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1588   a    = (Mat_MPIAIJ*)mat->data;
1589   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1590   mat->factor       = matin->factor;
1591   mat->assembled    = PETSC_TRUE;
1592   mat->insertmode   = NOT_SET_VALUES;
1593   mat->preallocated = PETSC_TRUE;
1594 
1595   a->rstart       = oldmat->rstart;
1596   a->rend         = oldmat->rend;
1597   a->cstart       = oldmat->cstart;
1598   a->cend         = oldmat->cend;
1599   a->size         = oldmat->size;
1600   a->rank         = oldmat->rank;
1601   a->donotstash   = oldmat->donotstash;
1602   a->roworiented  = oldmat->roworiented;
1603   a->rowindices   = 0;
1604   a->rowvalues    = 0;
1605   a->getrowactive = PETSC_FALSE;
1606 
1607   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1608   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1609   if (oldmat->colmap) {
1610 #if defined (PETSC_USE_CTABLE)
1611     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1612 #else
1613     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1614     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1615     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1616 #endif
1617   } else a->colmap = 0;
1618   if (oldmat->garray) {
1619     int len;
1620     len  = oldmat->B->n;
1621     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1622     PetscLogObjectMemory(mat,len*sizeof(int));
1623     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1624   } else a->garray = 0;
1625 
1626   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1627   PetscLogObjectParent(mat,a->lvec);
1628   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1629   PetscLogObjectParent(mat,a->Mvctx);
1630   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1631   PetscLogObjectParent(mat,a->A);
1632   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1633   PetscLogObjectParent(mat,a->B);
1634   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1635   *newmat = mat;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #include "petscsys.h"
1640 
1641 EXTERN_C_BEGIN
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatLoad_MPIAIJ"
1644 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1645 {
1646   Mat          A;
1647   PetscScalar  *vals,*svals;
1648   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1649   MPI_Status   status;
1650   int          i,nz,ierr,j,rstart,rend,fd;
1651   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1652   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1653   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1654 
1655   PetscFunctionBegin;
1656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1658   if (!rank) {
1659     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1660     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1661     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1662     if (header[3] < 0) {
1663       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1664     }
1665   }
1666 
1667   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1668   M = header[1]; N = header[2];
1669   /* determine ownership of all rows */
1670   m = M/size + ((M % size) > rank);
1671   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1672   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1673   rowners[0] = 0;
1674   for (i=2; i<=size; i++) {
1675     rowners[i] += rowners[i-1];
1676   }
1677   rstart = rowners[rank];
1678   rend   = rowners[rank+1];
1679 
1680   /* distribute row lengths to all processors */
1681   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1682   offlens = ourlens + (rend-rstart);
1683   if (!rank) {
1684     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1685     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1686     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1687     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1688     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1689     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1690   } else {
1691     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1692   }
1693 
1694   if (!rank) {
1695     /* calculate the number of nonzeros on each processor */
1696     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1697     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1698     for (i=0; i<size; i++) {
1699       for (j=rowners[i]; j< rowners[i+1]; j++) {
1700         procsnz[i] += rowlengths[j];
1701       }
1702     }
1703     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1704 
1705     /* determine max buffer needed and allocate it */
1706     maxnz = 0;
1707     for (i=0; i<size; i++) {
1708       maxnz = PetscMax(maxnz,procsnz[i]);
1709     }
1710     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1711 
1712     /* read in my part of the matrix column indices  */
1713     nz   = procsnz[0];
1714     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1715     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1716 
1717     /* read in every one elses and ship off */
1718     for (i=1; i<size; i++) {
1719       nz   = procsnz[i];
1720       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1721       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1722     }
1723     ierr = PetscFree(cols);CHKERRQ(ierr);
1724   } else {
1725     /* determine buffer space needed for message */
1726     nz = 0;
1727     for (i=0; i<m; i++) {
1728       nz += ourlens[i];
1729     }
1730     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1731 
1732     /* receive message of column indices*/
1733     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1734     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1735     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1736   }
1737 
1738   /* determine column ownership if matrix is not square */
1739   if (N != M) {
1740     n      = N/size + ((N % size) > rank);
1741     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1742     cstart = cend - n;
1743   } else {
1744     cstart = rstart;
1745     cend   = rend;
1746     n      = cend - cstart;
1747   }
1748 
1749   /* loop over local rows, determining number of off diagonal entries */
1750   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1751   jj = 0;
1752   for (i=0; i<m; i++) {
1753     for (j=0; j<ourlens[i]; j++) {
1754       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1755       jj++;
1756     }
1757   }
1758 
1759   /* create our matrix */
1760   for (i=0; i<m; i++) {
1761     ourlens[i] -= offlens[i];
1762   }
1763   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1764   A = *newmat;
1765   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1766   for (i=0; i<m; i++) {
1767     ourlens[i] += offlens[i];
1768   }
1769 
1770   if (!rank) {
1771     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1772 
1773     /* read in my part of the matrix numerical values  */
1774     nz   = procsnz[0];
1775     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1776 
1777     /* insert into matrix */
1778     jj      = rstart;
1779     smycols = mycols;
1780     svals   = vals;
1781     for (i=0; i<m; i++) {
1782       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1783       smycols += ourlens[i];
1784       svals   += ourlens[i];
1785       jj++;
1786     }
1787 
1788     /* read in other processors and ship out */
1789     for (i=1; i<size; i++) {
1790       nz   = procsnz[i];
1791       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1792       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1793     }
1794     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1795   } else {
1796     /* receive numeric values */
1797     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1798 
1799     /* receive message of values*/
1800     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1801     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1802     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1803 
1804     /* insert into matrix */
1805     jj      = rstart;
1806     smycols = mycols;
1807     svals   = vals;
1808     for (i=0; i<m; i++) {
1809       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1810       smycols += ourlens[i];
1811       svals   += ourlens[i];
1812       jj++;
1813     }
1814   }
1815   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1816   ierr = PetscFree(vals);CHKERRQ(ierr);
1817   ierr = PetscFree(mycols);CHKERRQ(ierr);
1818   ierr = PetscFree(rowners);CHKERRQ(ierr);
1819 
1820   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1821   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 EXTERN_C_END
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1828 /*
1829     Not great since it makes two copies of the submatrix, first an SeqAIJ
1830   in local and then by concatenating the local matrices the end result.
1831   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1832 */
1833 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1834 {
1835   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1836   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1837   Mat          *local,M,Mreuse;
1838   PetscScalar  *vwork,*aa;
1839   MPI_Comm     comm = mat->comm;
1840   Mat_SeqAIJ   *aij;
1841 
1842 
1843   PetscFunctionBegin;
1844   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1845   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1846 
1847   if (call ==  MAT_REUSE_MATRIX) {
1848     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1849     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1850     local = &Mreuse;
1851     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1852   } else {
1853     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1854     Mreuse = *local;
1855     ierr   = PetscFree(local);CHKERRQ(ierr);
1856   }
1857 
1858   /*
1859       m - number of local rows
1860       n - number of columns (same on all processors)
1861       rstart - first row in new global matrix generated
1862   */
1863   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1864   if (call == MAT_INITIAL_MATRIX) {
1865     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1866     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1867     ii  = aij->i;
1868     jj  = aij->j;
1869 
1870     /*
1871         Determine the number of non-zeros in the diagonal and off-diagonal
1872         portions of the matrix in order to do correct preallocation
1873     */
1874 
1875     /* first get start and end of "diagonal" columns */
1876     if (csize == PETSC_DECIDE) {
1877       nlocal = n/size + ((n % size) > rank);
1878     } else {
1879       nlocal = csize;
1880     }
1881     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1882     rstart = rend - nlocal;
1883     if (rank == size - 1 && rend != n) {
1884       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1885     }
1886 
1887     /* next, compute all the lengths */
1888     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1889     olens = dlens + m;
1890     for (i=0; i<m; i++) {
1891       jend = ii[i+1] - ii[i];
1892       olen = 0;
1893       dlen = 0;
1894       for (j=0; j<jend; j++) {
1895         if (*jj < rstart || *jj >= rend) olen++;
1896         else dlen++;
1897         jj++;
1898       }
1899       olens[i] = olen;
1900       dlens[i] = dlen;
1901     }
1902     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1903     ierr = PetscFree(dlens);CHKERRQ(ierr);
1904   } else {
1905     int ml,nl;
1906 
1907     M = *newmat;
1908     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1909     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1910     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1911     /*
1912          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1913        rather than the slower MatSetValues().
1914     */
1915     M->was_assembled = PETSC_TRUE;
1916     M->assembled     = PETSC_FALSE;
1917   }
1918   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1919   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1920   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1921   ii  = aij->i;
1922   jj  = aij->j;
1923   aa  = aij->a;
1924   for (i=0; i<m; i++) {
1925     row   = rstart + i;
1926     nz    = ii[i+1] - ii[i];
1927     cwork = jj;     jj += nz;
1928     vwork = aa;     aa += nz;
1929     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1930   }
1931 
1932   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1933   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1934   *newmat = M;
1935 
1936   /* save submatrix used in processor for next request */
1937   if (call ==  MAT_INITIAL_MATRIX) {
1938     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1939     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1940   }
1941 
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1947 /*@C
1948    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1949    (the default parallel PETSc format).  For good matrix assembly performance
1950    the user should preallocate the matrix storage by setting the parameters
1951    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1952    performance can be increased by more than a factor of 50.
1953 
1954    Collective on MPI_Comm
1955 
1956    Input Parameters:
1957 +  A - the matrix
1958 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1959            (same value is used for all local rows)
1960 .  d_nnz - array containing the number of nonzeros in the various rows of the
1961            DIAGONAL portion of the local submatrix (possibly different for each row)
1962            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1963            The size of this array is equal to the number of local rows, i.e 'm'.
1964            You must leave room for the diagonal entry even if it is zero.
1965 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1966            submatrix (same value is used for all local rows).
1967 -  o_nnz - array containing the number of nonzeros in the various rows of the
1968            OFF-DIAGONAL portion of the local submatrix (possibly different for
1969            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1970            structure. The size of this array is equal to the number
1971            of local rows, i.e 'm'.
1972 
1973    The AIJ format (also called the Yale sparse matrix format or
1974    compressed row storage), is fully compatible with standard Fortran 77
1975    storage.  That is, the stored row and column indices can begin at
1976    either one (as in Fortran) or zero.  See the users manual for details.
1977 
1978    The user MUST specify either the local or global matrix dimensions
1979    (possibly both).
1980 
1981    The parallel matrix is partitioned such that the first m0 rows belong to
1982    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1983    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1984 
1985    The DIAGONAL portion of the local submatrix of a processor can be defined
1986    as the submatrix which is obtained by extraction the part corresponding
1987    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1988    first row that belongs to the processor, and r2 is the last row belonging
1989    to the this processor. This is a square mxm matrix. The remaining portion
1990    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1991 
1992    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1993 
1994    By default, this format uses inodes (identical nodes) when possible.
1995    We search for consecutive rows with the same nonzero structure, thereby
1996    reusing matrix information to achieve increased efficiency.
1997 
1998    Options Database Keys:
1999 +  -mat_aij_no_inode  - Do not use inodes
2000 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2001 -  -mat_aij_oneindex - Internally use indexing starting at 1
2002         rather than 0.  Note that when calling MatSetValues(),
2003         the user still MUST index entries starting at 0!
2004 
2005    Example usage:
2006 
2007    Consider the following 8x8 matrix with 34 non-zero values, that is
2008    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2009    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2010    as follows:
2011 
2012 .vb
2013             1  2  0  |  0  3  0  |  0  4
2014     Proc0   0  5  6  |  7  0  0  |  8  0
2015             9  0 10  | 11  0  0  | 12  0
2016     -------------------------------------
2017            13  0 14  | 15 16 17  |  0  0
2018     Proc1   0 18  0  | 19 20 21  |  0  0
2019             0  0  0  | 22 23  0  | 24  0
2020     -------------------------------------
2021     Proc2  25 26 27  |  0  0 28  | 29  0
2022            30  0  0  | 31 32 33  |  0 34
2023 .ve
2024 
2025    This can be represented as a collection of submatrices as:
2026 
2027 .vb
2028       A B C
2029       D E F
2030       G H I
2031 .ve
2032 
2033    Where the submatrices A,B,C are owned by proc0, D,E,F are
2034    owned by proc1, G,H,I are owned by proc2.
2035 
2036    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2037    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2038    The 'M','N' parameters are 8,8, and have the same values on all procs.
2039 
2040    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2041    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2042    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2043    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2044    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2045    matrix, ans [DF] as another SeqAIJ matrix.
2046 
2047    When d_nz, o_nz parameters are specified, d_nz storage elements are
2048    allocated for every row of the local diagonal submatrix, and o_nz
2049    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2050    One way to choose d_nz and o_nz is to use the max nonzerors per local
2051    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2052    In this case, the values of d_nz,o_nz are:
2053 .vb
2054      proc0 : dnz = 2, o_nz = 2
2055      proc1 : dnz = 3, o_nz = 2
2056      proc2 : dnz = 1, o_nz = 4
2057 .ve
2058    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2059    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2060    for proc3. i.e we are using 12+15+10=37 storage locations to store
2061    34 values.
2062 
2063    When d_nnz, o_nnz parameters are specified, the storage is specified
2064    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2065    In the above case the values for d_nnz,o_nnz are:
2066 .vb
2067      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2068      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2069      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2070 .ve
2071    Here the space allocated is sum of all the above values i.e 34, and
2072    hence pre-allocation is perfect.
2073 
2074    Level: intermediate
2075 
2076 .keywords: matrix, aij, compressed row, sparse, parallel
2077 
2078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2079 @*/
2080 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2081 {
2082   Mat_MPIAIJ   *b;
2083   int          ierr,i;
2084   PetscTruth   flg2;
2085 
2086   PetscFunctionBegin;
2087   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2088   if (!flg2) PetscFunctionReturn(0);
2089   B->preallocated = PETSC_TRUE;
2090   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2091   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2092   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2093   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2094   if (d_nnz) {
2095     for (i=0; i<B->m; i++) {
2096       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2097     }
2098   }
2099   if (o_nnz) {
2100     for (i=0; i<B->m; i++) {
2101       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2102     }
2103   }
2104   b = (Mat_MPIAIJ*)B->data;
2105 
2106   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2107   PetscLogObjectParent(B,b->A);
2108   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2109   PetscLogObjectParent(B,b->B);
2110 
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 #undef __FUNCT__
2115 #define __FUNCT__ "MatCreateMPIAIJ"
2116 /*@C
2117    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2118    (the default parallel PETSc format).  For good matrix assembly performance
2119    the user should preallocate the matrix storage by setting the parameters
2120    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2121    performance can be increased by more than a factor of 50.
2122 
2123    Collective on MPI_Comm
2124 
2125    Input Parameters:
2126 +  comm - MPI communicator
2127 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2128            This value should be the same as the local size used in creating the
2129            y vector for the matrix-vector product y = Ax.
2130 .  n - This value should be the same as the local size used in creating the
2131        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2132        calculated if N is given) For square matrices n is almost always m.
2133 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2134 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2135 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2136            (same value is used for all local rows)
2137 .  d_nnz - array containing the number of nonzeros in the various rows of the
2138            DIAGONAL portion of the local submatrix (possibly different for each row)
2139            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2140            The size of this array is equal to the number of local rows, i.e 'm'.
2141            You must leave room for the diagonal entry even if it is zero.
2142 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2143            submatrix (same value is used for all local rows).
2144 -  o_nnz - array containing the number of nonzeros in the various rows of the
2145            OFF-DIAGONAL portion of the local submatrix (possibly different for
2146            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2147            structure. The size of this array is equal to the number
2148            of local rows, i.e 'm'.
2149 
2150    Output Parameter:
2151 .  A - the matrix
2152 
2153    Notes:
2154    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2155    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2156    storage requirements for this matrix.
2157 
2158    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2159    processor than it must be used on all processors that share the object for
2160    that argument.
2161 
2162    The AIJ format (also called the Yale sparse matrix format or
2163    compressed row storage), is fully compatible with standard Fortran 77
2164    storage.  That is, the stored row and column indices can begin at
2165    either one (as in Fortran) or zero.  See the users manual for details.
2166 
2167    The user MUST specify either the local or global matrix dimensions
2168    (possibly both).
2169 
2170    The parallel matrix is partitioned such that the first m0 rows belong to
2171    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2172    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2173 
2174    The DIAGONAL portion of the local submatrix of a processor can be defined
2175    as the submatrix which is obtained by extraction the part corresponding
2176    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2177    first row that belongs to the processor, and r2 is the last row belonging
2178    to the this processor. This is a square mxm matrix. The remaining portion
2179    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2180 
2181    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2182 
2183    By default, this format uses inodes (identical nodes) when possible.
2184    We search for consecutive rows with the same nonzero structure, thereby
2185    reusing matrix information to achieve increased efficiency.
2186 
2187    Options Database Keys:
2188 +  -mat_aij_no_inode  - Do not use inodes
2189 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2190 -  -mat_aij_oneindex - Internally use indexing starting at 1
2191         rather than 0.  Note that when calling MatSetValues(),
2192         the user still MUST index entries starting at 0!
2193 
2194 
2195    Example usage:
2196 
2197    Consider the following 8x8 matrix with 34 non-zero values, that is
2198    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2199    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2200    as follows:
2201 
2202 .vb
2203             1  2  0  |  0  3  0  |  0  4
2204     Proc0   0  5  6  |  7  0  0  |  8  0
2205             9  0 10  | 11  0  0  | 12  0
2206     -------------------------------------
2207            13  0 14  | 15 16 17  |  0  0
2208     Proc1   0 18  0  | 19 20 21  |  0  0
2209             0  0  0  | 22 23  0  | 24  0
2210     -------------------------------------
2211     Proc2  25 26 27  |  0  0 28  | 29  0
2212            30  0  0  | 31 32 33  |  0 34
2213 .ve
2214 
2215    This can be represented as a collection of submatrices as:
2216 
2217 .vb
2218       A B C
2219       D E F
2220       G H I
2221 .ve
2222 
2223    Where the submatrices A,B,C are owned by proc0, D,E,F are
2224    owned by proc1, G,H,I are owned by proc2.
2225 
2226    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2227    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2228    The 'M','N' parameters are 8,8, and have the same values on all procs.
2229 
2230    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2231    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2232    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2233    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2234    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2235    matrix, ans [DF] as another SeqAIJ matrix.
2236 
2237    When d_nz, o_nz parameters are specified, d_nz storage elements are
2238    allocated for every row of the local diagonal submatrix, and o_nz
2239    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2240    One way to choose d_nz and o_nz is to use the max nonzerors per local
2241    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2242    In this case, the values of d_nz,o_nz are:
2243 .vb
2244      proc0 : dnz = 2, o_nz = 2
2245      proc1 : dnz = 3, o_nz = 2
2246      proc2 : dnz = 1, o_nz = 4
2247 .ve
2248    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2249    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2250    for proc3. i.e we are using 12+15+10=37 storage locations to store
2251    34 values.
2252 
2253    When d_nnz, o_nnz parameters are specified, the storage is specified
2254    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2255    In the above case the values for d_nnz,o_nnz are:
2256 .vb
2257      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2258      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2259      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2260 .ve
2261    Here the space allocated is sum of all the above values i.e 34, and
2262    hence pre-allocation is perfect.
2263 
2264    Level: intermediate
2265 
2266 .keywords: matrix, aij, compressed row, sparse, parallel
2267 
2268 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2269 @*/
2270 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2271 {
2272   int ierr,size;
2273 
2274   PetscFunctionBegin;
2275   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2276   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2277   if (size > 1) {
2278     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2279     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2280   } else {
2281     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2282     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2283   }
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2289 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2290 {
2291   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2292   PetscFunctionBegin;
2293   *Ad     = a->A;
2294   *Ao     = a->B;
2295   *colmap = a->garray;
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2301 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2302 {
2303   int        ierr;
2304   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2305 
2306   PetscFunctionBegin;
2307   if (coloring->ctype == IS_COLORING_LOCAL) {
2308     int        *allcolors,*colors,i;
2309     ISColoring ocoloring;
2310 
2311     /* set coloring for diagonal portion */
2312     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2313 
2314     /* set coloring for off-diagonal portion */
2315     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2316     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2317     for (i=0; i<a->B->n; i++) {
2318       colors[i] = allcolors[a->garray[i]];
2319     }
2320     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2321     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2322     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2323     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2324   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2325     int        *colors,i,*larray;
2326     ISColoring ocoloring;
2327 
2328     /* set coloring for diagonal portion */
2329     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2330     for (i=0; i<a->A->n; i++) {
2331       larray[i] = i + a->cstart;
2332     }
2333     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2334     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2335     for (i=0; i<a->A->n; i++) {
2336       colors[i] = coloring->colors[larray[i]];
2337     }
2338     ierr = PetscFree(larray);CHKERRQ(ierr);
2339     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2340     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2341     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2342 
2343     /* set coloring for off-diagonal portion */
2344     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2345     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2346     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2347     for (i=0; i<a->B->n; i++) {
2348       colors[i] = coloring->colors[larray[i]];
2349     }
2350     ierr = PetscFree(larray);CHKERRQ(ierr);
2351     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2352     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2353     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2354   } else {
2355     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2356   }
2357 
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2363 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2364 {
2365   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2366   int        ierr;
2367 
2368   PetscFunctionBegin;
2369   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2370   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2376 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2377 {
2378   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2379   int        ierr;
2380 
2381   PetscFunctionBegin;
2382   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2383   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386