xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 862afb7ba6de47b958ee2b08b0911f831df193f9)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     PetscScalar *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatRelax_MPIAIJ"
835 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
836 {
837   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
838   int          ierr;
839   Vec          bb1;
840   PetscScalar  mone=-1.0;
841 
842   PetscFunctionBegin;
843 
844   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
845 
846   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
847     if (flag & SOR_ZERO_INITIAL_GUESS) {
848       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
849       its--;
850     }
851 
852     while (its--) {
853       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
854       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
855 
856       /* update rhs: bb1 = bb - B*x */
857       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
858       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
859 
860       /* local sweep */
861       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
862       CHKERRQ(ierr);
863     }
864   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
865     if (flag & SOR_ZERO_INITIAL_GUESS) {
866       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
867       its--;
868     }
869     while (its--) {
870       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
871       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
872 
873       /* update rhs: bb1 = bb - B*x */
874       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
875       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
876 
877       /* local sweep */
878       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
879       CHKERRQ(ierr);
880     }
881   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
882     if (flag & SOR_ZERO_INITIAL_GUESS) {
883       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
884       its--;
885     }
886     while (its--) {
887       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
888       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
889 
890       /* update rhs: bb1 = bb - B*x */
891       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
892       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
893 
894       /* local sweep */
895       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
896       CHKERRQ(ierr);
897     }
898   } else {
899     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
900   }
901 
902   ierr = VecDestroy(bb1);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "MatGetInfo_MPIAIJ"
908 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
909 {
910   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
911   Mat        A = mat->A,B = mat->B;
912   int        ierr;
913   PetscReal  isend[5],irecv[5];
914 
915   PetscFunctionBegin;
916   info->block_size     = 1.0;
917   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
918   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
919   isend[3] = info->memory;  isend[4] = info->mallocs;
920   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
921   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
922   isend[3] += info->memory;  isend[4] += info->mallocs;
923   if (flag == MAT_LOCAL) {
924     info->nz_used      = isend[0];
925     info->nz_allocated = isend[1];
926     info->nz_unneeded  = isend[2];
927     info->memory       = isend[3];
928     info->mallocs      = isend[4];
929   } else if (flag == MAT_GLOBAL_MAX) {
930     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
931     info->nz_used      = irecv[0];
932     info->nz_allocated = irecv[1];
933     info->nz_unneeded  = irecv[2];
934     info->memory       = irecv[3];
935     info->mallocs      = irecv[4];
936   } else if (flag == MAT_GLOBAL_SUM) {
937     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
938     info->nz_used      = irecv[0];
939     info->nz_allocated = irecv[1];
940     info->nz_unneeded  = irecv[2];
941     info->memory       = irecv[3];
942     info->mallocs      = irecv[4];
943   }
944   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
945   info->fill_ratio_needed = 0;
946   info->factor_mallocs    = 0;
947   info->rows_global       = (double)matin->M;
948   info->columns_global    = (double)matin->N;
949   info->rows_local        = (double)matin->m;
950   info->columns_local     = (double)matin->N;
951 
952   PetscFunctionReturn(0);
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatSetOption_MPIAIJ"
957 int MatSetOption_MPIAIJ(Mat A,MatOption op)
958 {
959   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
960   int        ierr;
961 
962   PetscFunctionBegin;
963   switch (op) {
964   case MAT_NO_NEW_NONZERO_LOCATIONS:
965   case MAT_YES_NEW_NONZERO_LOCATIONS:
966   case MAT_COLUMNS_UNSORTED:
967   case MAT_COLUMNS_SORTED:
968   case MAT_NEW_NONZERO_ALLOCATION_ERR:
969   case MAT_KEEP_ZEROED_ROWS:
970   case MAT_NEW_NONZERO_LOCATION_ERR:
971   case MAT_USE_INODES:
972   case MAT_DO_NOT_USE_INODES:
973   case MAT_IGNORE_ZERO_ENTRIES:
974     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
975     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
976     break;
977   case MAT_ROW_ORIENTED:
978     a->roworiented = PETSC_TRUE;
979     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
980     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
981     break;
982   case MAT_ROWS_SORTED:
983   case MAT_ROWS_UNSORTED:
984   case MAT_YES_NEW_DIAGONALS:
985   case MAT_USE_SINGLE_PRECISION_SOLVES:
986     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
987     break;
988   case MAT_COLUMN_ORIENTED:
989     a->roworiented = PETSC_FALSE;
990     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
991     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
992     break;
993   case MAT_IGNORE_OFF_PROC_ENTRIES:
994     a->donotstash = PETSC_TRUE;
995     break;
996   case MAT_NO_NEW_DIAGONALS:
997     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
998     break;
999   default:
1000     SETERRQ(PETSC_ERR_SUP,"unknown option");
1001     break;
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatGetRow_MPIAIJ"
1008 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1009 {
1010   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1011   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1012   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1013   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1014   int          *cmap,*idx_p;
1015 
1016   PetscFunctionBegin;
1017   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1018   mat->getrowactive = PETSC_TRUE;
1019 
1020   if (!mat->rowvalues && (idx || v)) {
1021     /*
1022         allocate enough space to hold information from the longest row.
1023     */
1024     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1025     int     max = 1,tmp;
1026     for (i=0; i<matin->m; i++) {
1027       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1028       if (max < tmp) { max = tmp; }
1029     }
1030     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1031     mat->rowindices = (int*)(mat->rowvalues + max);
1032   }
1033 
1034   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1035   lrow = row - rstart;
1036 
1037   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1038   if (!v)   {pvA = 0; pvB = 0;}
1039   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1040   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1041   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1042   nztot = nzA + nzB;
1043 
1044   cmap  = mat->garray;
1045   if (v  || idx) {
1046     if (nztot) {
1047       /* Sort by increasing column numbers, assuming A and B already sorted */
1048       int imark = -1;
1049       if (v) {
1050         *v = v_p = mat->rowvalues;
1051         for (i=0; i<nzB; i++) {
1052           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1053           else break;
1054         }
1055         imark = i;
1056         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1057         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1058       }
1059       if (idx) {
1060         *idx = idx_p = mat->rowindices;
1061         if (imark > -1) {
1062           for (i=0; i<imark; i++) {
1063             idx_p[i] = cmap[cworkB[i]];
1064           }
1065         } else {
1066           for (i=0; i<nzB; i++) {
1067             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1068             else break;
1069           }
1070           imark = i;
1071         }
1072         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1073         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1074       }
1075     } else {
1076       if (idx) *idx = 0;
1077       if (v)   *v   = 0;
1078     }
1079   }
1080   *nz = nztot;
1081   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1082   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1088 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1089 {
1090   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1091 
1092   PetscFunctionBegin;
1093   if (aij->getrowactive == PETSC_FALSE) {
1094     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1095   }
1096   aij->getrowactive = PETSC_FALSE;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatNorm_MPIAIJ"
1102 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1103 {
1104   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1105   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1106   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1107   PetscReal    sum = 0.0;
1108   PetscScalar  *v;
1109 
1110   PetscFunctionBegin;
1111   if (aij->size == 1) {
1112     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1113   } else {
1114     if (type == NORM_FROBENIUS) {
1115       v = amat->a;
1116       for (i=0; i<amat->nz; i++) {
1117 #if defined(PETSC_USE_COMPLEX)
1118         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1119 #else
1120         sum += (*v)*(*v); v++;
1121 #endif
1122       }
1123       v = bmat->a;
1124       for (i=0; i<bmat->nz; i++) {
1125 #if defined(PETSC_USE_COMPLEX)
1126         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1127 #else
1128         sum += (*v)*(*v); v++;
1129 #endif
1130       }
1131       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1132       *norm = sqrt(*norm);
1133     } else if (type == NORM_1) { /* max column norm */
1134       PetscReal *tmp,*tmp2;
1135       int    *jj,*garray = aij->garray;
1136       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1137       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1138       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1139       *norm = 0.0;
1140       v = amat->a; jj = amat->j;
1141       for (j=0; j<amat->nz; j++) {
1142         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1143       }
1144       v = bmat->a; jj = bmat->j;
1145       for (j=0; j<bmat->nz; j++) {
1146         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1147       }
1148       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1149       for (j=0; j<mat->N; j++) {
1150         if (tmp2[j] > *norm) *norm = tmp2[j];
1151       }
1152       ierr = PetscFree(tmp);CHKERRQ(ierr);
1153       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1154     } else if (type == NORM_INFINITY) { /* max row norm */
1155       PetscReal ntemp = 0.0;
1156       for (j=0; j<aij->A->m; j++) {
1157         v = amat->a + amat->i[j] + shift;
1158         sum = 0.0;
1159         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1160           sum += PetscAbsScalar(*v); v++;
1161         }
1162         v = bmat->a + bmat->i[j] + shift;
1163         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1164           sum += PetscAbsScalar(*v); v++;
1165         }
1166         if (sum > ntemp) ntemp = sum;
1167       }
1168       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1169     } else {
1170       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1171     }
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatTranspose_MPIAIJ"
1178 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1179 {
1180   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1181   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1182   int          ierr,shift = Aloc->indexshift;
1183   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1184   Mat          B;
1185   PetscScalar  *array;
1186 
1187   PetscFunctionBegin;
1188   if (!matout && M != N) {
1189     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1190   }
1191 
1192   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1193 
1194   /* copy over the A part */
1195   Aloc = (Mat_SeqAIJ*)a->A->data;
1196   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1197   row = a->rstart;
1198   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1199   for (i=0; i<m; i++) {
1200     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1201     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1202   }
1203   aj = Aloc->j;
1204   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1205 
1206   /* copy over the B part */
1207   Aloc = (Mat_SeqAIJ*)a->B->data;
1208   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1209   row  = a->rstart;
1210   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1211   ct   = cols;
1212   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1213   for (i=0; i<m; i++) {
1214     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1215     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1216   }
1217   ierr = PetscFree(ct);CHKERRQ(ierr);
1218   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1219   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1220   if (matout) {
1221     *matout = B;
1222   } else {
1223     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1230 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1231 {
1232   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1233   Mat        a = aij->A,b = aij->B;
1234   int        ierr,s1,s2,s3;
1235 
1236   PetscFunctionBegin;
1237   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1238   if (rr) {
1239     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1240     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1241     /* Overlap communication with computation. */
1242     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1243   }
1244   if (ll) {
1245     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1246     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1247     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1248   }
1249   /* scale  the diagonal block */
1250   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1251 
1252   if (rr) {
1253     /* Do a scatter end and then right scale the off-diagonal block */
1254     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1255     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1256   }
1257 
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1264 int MatPrintHelp_MPIAIJ(Mat A)
1265 {
1266   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1267   int        ierr;
1268 
1269   PetscFunctionBegin;
1270   if (!a->rank) {
1271     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1278 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1279 {
1280   PetscFunctionBegin;
1281   *bs = 1;
1282   PetscFunctionReturn(0);
1283 }
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1286 int MatSetUnfactored_MPIAIJ(Mat A)
1287 {
1288   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1289   int        ierr;
1290 
1291   PetscFunctionBegin;
1292   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatEqual_MPIAIJ"
1298 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1299 {
1300   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1301   Mat        a,b,c,d;
1302   PetscTruth flg;
1303   int        ierr;
1304 
1305   PetscFunctionBegin;
1306   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1307   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1308   a = matA->A; b = matA->B;
1309   c = matB->A; d = matB->B;
1310 
1311   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1312   if (flg == PETSC_TRUE) {
1313     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1314   }
1315   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatCopy_MPIAIJ"
1321 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1322 {
1323   int        ierr;
1324   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1325   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1326   PetscTruth flg;
1327 
1328   PetscFunctionBegin;
1329   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1330   if (str != SAME_NONZERO_PATTERN || !flg) {
1331     /* because of the column compression in the off-processor part of the matrix a->B,
1332        the number of columns in a->B and b->B may be different, hence we cannot call
1333        the MatCopy() directly on the two parts. If need be, we can provide a more
1334        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1335        then copying the submatrices */
1336     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1337   } else {
1338     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1339     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1346 int MatSetUpPreallocation_MPIAIJ(Mat A)
1347 {
1348   int        ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1356 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1357 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1358 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1359 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1360 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1361 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1362 #endif
1363 
1364 /* -------------------------------------------------------------------*/
1365 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1366        MatGetRow_MPIAIJ,
1367        MatRestoreRow_MPIAIJ,
1368        MatMult_MPIAIJ,
1369        MatMultAdd_MPIAIJ,
1370        MatMultTranspose_MPIAIJ,
1371        MatMultTransposeAdd_MPIAIJ,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        MatRelax_MPIAIJ,
1379        MatTranspose_MPIAIJ,
1380        MatGetInfo_MPIAIJ,
1381        MatEqual_MPIAIJ,
1382        MatGetDiagonal_MPIAIJ,
1383        MatDiagonalScale_MPIAIJ,
1384        MatNorm_MPIAIJ,
1385        MatAssemblyBegin_MPIAIJ,
1386        MatAssemblyEnd_MPIAIJ,
1387        0,
1388        MatSetOption_MPIAIJ,
1389        MatZeroEntries_MPIAIJ,
1390        MatZeroRows_MPIAIJ,
1391 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1392        MatLUFactorSymbolic_MPIAIJ_TFS,
1393 #else
1394        0,
1395 #endif
1396        0,
1397        0,
1398        0,
1399        MatSetUpPreallocation_MPIAIJ,
1400        0,
1401        0,
1402        0,
1403        0,
1404        MatDuplicate_MPIAIJ,
1405        0,
1406        0,
1407        0,
1408        0,
1409        0,
1410        MatGetSubMatrices_MPIAIJ,
1411        MatIncreaseOverlap_MPIAIJ,
1412        MatGetValues_MPIAIJ,
1413        MatCopy_MPIAIJ,
1414        MatPrintHelp_MPIAIJ,
1415        MatScale_MPIAIJ,
1416        0,
1417        0,
1418        0,
1419        MatGetBlockSize_MPIAIJ,
1420        0,
1421        0,
1422        0,
1423        0,
1424        MatFDColoringCreate_MPIAIJ,
1425        0,
1426        MatSetUnfactored_MPIAIJ,
1427        0,
1428        0,
1429        MatGetSubMatrix_MPIAIJ,
1430        MatDestroy_MPIAIJ,
1431        MatView_MPIAIJ,
1432        MatGetPetscMaps_Petsc,
1433        0,
1434        0,
1435        0,
1436        0,
1437        0,
1438        0,
1439        0,
1440        0,
1441        MatSetColoring_MPIAIJ,
1442        MatSetValuesAdic_MPIAIJ,
1443        MatSetValuesAdifor_MPIAIJ
1444 };
1445 
1446 /* ----------------------------------------------------------------------------------------*/
1447 
1448 EXTERN_C_BEGIN
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1451 int MatStoreValues_MPIAIJ(Mat mat)
1452 {
1453   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1454   int        ierr;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1458   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 EXTERN_C_END
1462 
1463 EXTERN_C_BEGIN
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1466 int MatRetrieveValues_MPIAIJ(Mat mat)
1467 {
1468   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1469   int        ierr;
1470 
1471   PetscFunctionBegin;
1472   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1473   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 EXTERN_C_END
1477 
1478 #include "petscpc.h"
1479 EXTERN_C_BEGIN
1480 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1481 EXTERN_C_END
1482 
1483 EXTERN_C_BEGIN
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatCreate_MPIAIJ"
1486 int MatCreate_MPIAIJ(Mat B)
1487 {
1488   Mat_MPIAIJ   *b;
1489   int          ierr,i,size;
1490 
1491   PetscFunctionBegin;
1492   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1493 
1494   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1495   B->data         = (void*)b;
1496   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1497   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1498   B->factor       = 0;
1499   B->assembled    = PETSC_FALSE;
1500   B->mapping      = 0;
1501 
1502   B->insertmode      = NOT_SET_VALUES;
1503   b->size            = size;
1504   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1505 
1506   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1507   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1508 
1509   /* the information in the maps duplicates the information computed below, eventually
1510      we should remove the duplicate information that is not contained in the maps */
1511   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1512   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1513 
1514   /* build local table of row and column ownerships */
1515   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1516   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1517   b->cowners = b->rowners + b->size + 2;
1518   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1519   b->rowners[0] = 0;
1520   for (i=2; i<=b->size; i++) {
1521     b->rowners[i] += b->rowners[i-1];
1522   }
1523   b->rstart = b->rowners[b->rank];
1524   b->rend   = b->rowners[b->rank+1];
1525   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1526   b->cowners[0] = 0;
1527   for (i=2; i<=b->size; i++) {
1528     b->cowners[i] += b->cowners[i-1];
1529   }
1530   b->cstart = b->cowners[b->rank];
1531   b->cend   = b->cowners[b->rank+1];
1532 
1533   /* build cache for off array entries formed */
1534   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1535   b->donotstash  = PETSC_FALSE;
1536   b->colmap      = 0;
1537   b->garray      = 0;
1538   b->roworiented = PETSC_TRUE;
1539 
1540   /* stuff used for matrix vector multiply */
1541   b->lvec      = PETSC_NULL;
1542   b->Mvctx     = PETSC_NULL;
1543 
1544   /* stuff for MatGetRow() */
1545   b->rowindices   = 0;
1546   b->rowvalues    = 0;
1547   b->getrowactive = PETSC_FALSE;
1548 
1549   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1550                                      "MatStoreValues_MPIAIJ",
1551                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1552   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1553                                      "MatRetrieveValues_MPIAIJ",
1554                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1555   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1556                                      "MatGetDiagonalBlock_MPIAIJ",
1557                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1564 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1565 {
1566   Mat        mat;
1567   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1568   int        ierr;
1569 
1570   PetscFunctionBegin;
1571   *newmat       = 0;
1572   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1573   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1574   a    = (Mat_MPIAIJ*)mat->data;
1575   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1576   mat->factor       = matin->factor;
1577   mat->assembled    = PETSC_TRUE;
1578   mat->insertmode   = NOT_SET_VALUES;
1579   mat->preallocated = PETSC_TRUE;
1580 
1581   a->rstart       = oldmat->rstart;
1582   a->rend         = oldmat->rend;
1583   a->cstart       = oldmat->cstart;
1584   a->cend         = oldmat->cend;
1585   a->size         = oldmat->size;
1586   a->rank         = oldmat->rank;
1587   a->donotstash   = oldmat->donotstash;
1588   a->roworiented  = oldmat->roworiented;
1589   a->rowindices   = 0;
1590   a->rowvalues    = 0;
1591   a->getrowactive = PETSC_FALSE;
1592 
1593   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1594   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1595   if (oldmat->colmap) {
1596 #if defined (PETSC_USE_CTABLE)
1597     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1598 #else
1599     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1600     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1601     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1602 #endif
1603   } else a->colmap = 0;
1604   if (oldmat->garray) {
1605     int len;
1606     len  = oldmat->B->n;
1607     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1608     PetscLogObjectMemory(mat,len*sizeof(int));
1609     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1610   } else a->garray = 0;
1611 
1612   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1613   PetscLogObjectParent(mat,a->lvec);
1614   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1615   PetscLogObjectParent(mat,a->Mvctx);
1616   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1617   PetscLogObjectParent(mat,a->A);
1618   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1619   PetscLogObjectParent(mat,a->B);
1620   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1621   *newmat = mat;
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #include "petscsys.h"
1626 
1627 EXTERN_C_BEGIN
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatLoad_MPIAIJ"
1630 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1631 {
1632   Mat          A;
1633   PetscScalar  *vals,*svals;
1634   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1635   MPI_Status   status;
1636   int          i,nz,ierr,j,rstart,rend,fd;
1637   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1638   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1639   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1640 
1641   PetscFunctionBegin;
1642   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1643   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1644   if (!rank) {
1645     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1646     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1647     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1648     if (header[3] < 0) {
1649       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1650     }
1651   }
1652 
1653   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1654   M = header[1]; N = header[2];
1655   /* determine ownership of all rows */
1656   m = M/size + ((M % size) > rank);
1657   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1658   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1659   rowners[0] = 0;
1660   for (i=2; i<=size; i++) {
1661     rowners[i] += rowners[i-1];
1662   }
1663   rstart = rowners[rank];
1664   rend   = rowners[rank+1];
1665 
1666   /* distribute row lengths to all processors */
1667   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1668   offlens = ourlens + (rend-rstart);
1669   if (!rank) {
1670     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1671     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1672     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1673     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1674     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1675     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1676   } else {
1677     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1678   }
1679 
1680   if (!rank) {
1681     /* calculate the number of nonzeros on each processor */
1682     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1683     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1684     for (i=0; i<size; i++) {
1685       for (j=rowners[i]; j< rowners[i+1]; j++) {
1686         procsnz[i] += rowlengths[j];
1687       }
1688     }
1689     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1690 
1691     /* determine max buffer needed and allocate it */
1692     maxnz = 0;
1693     for (i=0; i<size; i++) {
1694       maxnz = PetscMax(maxnz,procsnz[i]);
1695     }
1696     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1697 
1698     /* read in my part of the matrix column indices  */
1699     nz   = procsnz[0];
1700     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1701     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1702 
1703     /* read in every one elses and ship off */
1704     for (i=1; i<size; i++) {
1705       nz   = procsnz[i];
1706       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1707       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1708     }
1709     ierr = PetscFree(cols);CHKERRQ(ierr);
1710   } else {
1711     /* determine buffer space needed for message */
1712     nz = 0;
1713     for (i=0; i<m; i++) {
1714       nz += ourlens[i];
1715     }
1716     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1717 
1718     /* receive message of column indices*/
1719     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1720     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1721     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1722   }
1723 
1724   /* determine column ownership if matrix is not square */
1725   if (N != M) {
1726     n      = N/size + ((N % size) > rank);
1727     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1728     cstart = cend - n;
1729   } else {
1730     cstart = rstart;
1731     cend   = rend;
1732     n      = cend - cstart;
1733   }
1734 
1735   /* loop over local rows, determining number of off diagonal entries */
1736   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1737   jj = 0;
1738   for (i=0; i<m; i++) {
1739     for (j=0; j<ourlens[i]; j++) {
1740       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1741       jj++;
1742     }
1743   }
1744 
1745   /* create our matrix */
1746   for (i=0; i<m; i++) {
1747     ourlens[i] -= offlens[i];
1748   }
1749   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1750   A = *newmat;
1751   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1752   for (i=0; i<m; i++) {
1753     ourlens[i] += offlens[i];
1754   }
1755 
1756   if (!rank) {
1757     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1758 
1759     /* read in my part of the matrix numerical values  */
1760     nz   = procsnz[0];
1761     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1762 
1763     /* insert into matrix */
1764     jj      = rstart;
1765     smycols = mycols;
1766     svals   = vals;
1767     for (i=0; i<m; i++) {
1768       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1769       smycols += ourlens[i];
1770       svals   += ourlens[i];
1771       jj++;
1772     }
1773 
1774     /* read in other processors and ship out */
1775     for (i=1; i<size; i++) {
1776       nz   = procsnz[i];
1777       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1778       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1779     }
1780     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1781   } else {
1782     /* receive numeric values */
1783     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1784 
1785     /* receive message of values*/
1786     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1787     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1788     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1789 
1790     /* insert into matrix */
1791     jj      = rstart;
1792     smycols = mycols;
1793     svals   = vals;
1794     for (i=0; i<m; i++) {
1795       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1796       smycols += ourlens[i];
1797       svals   += ourlens[i];
1798       jj++;
1799     }
1800   }
1801   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1802   ierr = PetscFree(vals);CHKERRQ(ierr);
1803   ierr = PetscFree(mycols);CHKERRQ(ierr);
1804   ierr = PetscFree(rowners);CHKERRQ(ierr);
1805 
1806   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 EXTERN_C_END
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1814 /*
1815     Not great since it makes two copies of the submatrix, first an SeqAIJ
1816   in local and then by concatenating the local matrices the end result.
1817   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1818 */
1819 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1820 {
1821   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1822   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1823   Mat          *local,M,Mreuse;
1824   PetscScalar  *vwork,*aa;
1825   MPI_Comm     comm = mat->comm;
1826   Mat_SeqAIJ   *aij;
1827 
1828 
1829   PetscFunctionBegin;
1830   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1831   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1832 
1833   if (call ==  MAT_REUSE_MATRIX) {
1834     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1835     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1836     local = &Mreuse;
1837     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1838   } else {
1839     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1840     Mreuse = *local;
1841     ierr   = PetscFree(local);CHKERRQ(ierr);
1842   }
1843 
1844   /*
1845       m - number of local rows
1846       n - number of columns (same on all processors)
1847       rstart - first row in new global matrix generated
1848   */
1849   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1850   if (call == MAT_INITIAL_MATRIX) {
1851     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1852     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1853     ii  = aij->i;
1854     jj  = aij->j;
1855 
1856     /*
1857         Determine the number of non-zeros in the diagonal and off-diagonal
1858         portions of the matrix in order to do correct preallocation
1859     */
1860 
1861     /* first get start and end of "diagonal" columns */
1862     if (csize == PETSC_DECIDE) {
1863       nlocal = n/size + ((n % size) > rank);
1864     } else {
1865       nlocal = csize;
1866     }
1867     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1868     rstart = rend - nlocal;
1869     if (rank == size - 1 && rend != n) {
1870       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1871     }
1872 
1873     /* next, compute all the lengths */
1874     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1875     olens = dlens + m;
1876     for (i=0; i<m; i++) {
1877       jend = ii[i+1] - ii[i];
1878       olen = 0;
1879       dlen = 0;
1880       for (j=0; j<jend; j++) {
1881         if (*jj < rstart || *jj >= rend) olen++;
1882         else dlen++;
1883         jj++;
1884       }
1885       olens[i] = olen;
1886       dlens[i] = dlen;
1887     }
1888     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1889     ierr = PetscFree(dlens);CHKERRQ(ierr);
1890   } else {
1891     int ml,nl;
1892 
1893     M = *newmat;
1894     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1895     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1896     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1897     /*
1898          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1899        rather than the slower MatSetValues().
1900     */
1901     M->was_assembled = PETSC_TRUE;
1902     M->assembled     = PETSC_FALSE;
1903   }
1904   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1905   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1906   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1907   ii  = aij->i;
1908   jj  = aij->j;
1909   aa  = aij->a;
1910   for (i=0; i<m; i++) {
1911     row   = rstart + i;
1912     nz    = ii[i+1] - ii[i];
1913     cwork = jj;     jj += nz;
1914     vwork = aa;     aa += nz;
1915     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1916   }
1917 
1918   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1919   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1920   *newmat = M;
1921 
1922   /* save submatrix used in processor for next request */
1923   if (call ==  MAT_INITIAL_MATRIX) {
1924     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1925     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1926   }
1927 
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1933 /*@C
1934    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1935    (the default parallel PETSc format).  For good matrix assembly performance
1936    the user should preallocate the matrix storage by setting the parameters
1937    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1938    performance can be increased by more than a factor of 50.
1939 
1940    Collective on MPI_Comm
1941 
1942    Input Parameters:
1943 +  A - the matrix
1944 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1945            (same value is used for all local rows)
1946 .  d_nnz - array containing the number of nonzeros in the various rows of the
1947            DIAGONAL portion of the local submatrix (possibly different for each row)
1948            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1949            The size of this array is equal to the number of local rows, i.e 'm'.
1950            You must leave room for the diagonal entry even if it is zero.
1951 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1952            submatrix (same value is used for all local rows).
1953 -  o_nnz - array containing the number of nonzeros in the various rows of the
1954            OFF-DIAGONAL portion of the local submatrix (possibly different for
1955            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1956            structure. The size of this array is equal to the number
1957            of local rows, i.e 'm'.
1958 
1959    The AIJ format (also called the Yale sparse matrix format or
1960    compressed row storage), is fully compatible with standard Fortran 77
1961    storage.  That is, the stored row and column indices can begin at
1962    either one (as in Fortran) or zero.  See the users manual for details.
1963 
1964    The user MUST specify either the local or global matrix dimensions
1965    (possibly both).
1966 
1967    The parallel matrix is partitioned such that the first m0 rows belong to
1968    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1969    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1970 
1971    The DIAGONAL portion of the local submatrix of a processor can be defined
1972    as the submatrix which is obtained by extraction the part corresponding
1973    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1974    first row that belongs to the processor, and r2 is the last row belonging
1975    to the this processor. This is a square mxm matrix. The remaining portion
1976    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1977 
1978    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1979 
1980    By default, this format uses inodes (identical nodes) when possible.
1981    We search for consecutive rows with the same nonzero structure, thereby
1982    reusing matrix information to achieve increased efficiency.
1983 
1984    Options Database Keys:
1985 +  -mat_aij_no_inode  - Do not use inodes
1986 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1987 -  -mat_aij_oneindex - Internally use indexing starting at 1
1988         rather than 0.  Note that when calling MatSetValues(),
1989         the user still MUST index entries starting at 0!
1990 
1991    Example usage:
1992 
1993    Consider the following 8x8 matrix with 34 non-zero values, that is
1994    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1995    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1996    as follows:
1997 
1998 .vb
1999             1  2  0  |  0  3  0  |  0  4
2000     Proc0   0  5  6  |  7  0  0  |  8  0
2001             9  0 10  | 11  0  0  | 12  0
2002     -------------------------------------
2003            13  0 14  | 15 16 17  |  0  0
2004     Proc1   0 18  0  | 19 20 21  |  0  0
2005             0  0  0  | 22 23  0  | 24  0
2006     -------------------------------------
2007     Proc2  25 26 27  |  0  0 28  | 29  0
2008            30  0  0  | 31 32 33  |  0 34
2009 .ve
2010 
2011    This can be represented as a collection of submatrices as:
2012 
2013 .vb
2014       A B C
2015       D E F
2016       G H I
2017 .ve
2018 
2019    Where the submatrices A,B,C are owned by proc0, D,E,F are
2020    owned by proc1, G,H,I are owned by proc2.
2021 
2022    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2023    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2024    The 'M','N' parameters are 8,8, and have the same values on all procs.
2025 
2026    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2027    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2028    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2029    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2030    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2031    matrix, ans [DF] as another SeqAIJ matrix.
2032 
2033    When d_nz, o_nz parameters are specified, d_nz storage elements are
2034    allocated for every row of the local diagonal submatrix, and o_nz
2035    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2036    One way to choose d_nz and o_nz is to use the max nonzerors per local
2037    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2038    In this case, the values of d_nz,o_nz are:
2039 .vb
2040      proc0 : dnz = 2, o_nz = 2
2041      proc1 : dnz = 3, o_nz = 2
2042      proc2 : dnz = 1, o_nz = 4
2043 .ve
2044    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2045    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2046    for proc3. i.e we are using 12+15+10=37 storage locations to store
2047    34 values.
2048 
2049    When d_nnz, o_nnz parameters are specified, the storage is specified
2050    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2051    In the above case the values for d_nnz,o_nnz are:
2052 .vb
2053      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2054      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2055      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2056 .ve
2057    Here the space allocated is sum of all the above values i.e 34, and
2058    hence pre-allocation is perfect.
2059 
2060    Level: intermediate
2061 
2062 .keywords: matrix, aij, compressed row, sparse, parallel
2063 
2064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2065 @*/
2066 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2067 {
2068   Mat_MPIAIJ   *b;
2069   int          ierr,i;
2070   PetscTruth   flg2;
2071 
2072   PetscFunctionBegin;
2073   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2074   if (!flg2) PetscFunctionReturn(0);
2075   B->preallocated = PETSC_TRUE;
2076   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2077   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2078   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2079   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2080   if (d_nnz) {
2081     for (i=0; i<B->m; i++) {
2082       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2083     }
2084   }
2085   if (o_nnz) {
2086     for (i=0; i<B->m; i++) {
2087       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2088     }
2089   }
2090   b = (Mat_MPIAIJ*)B->data;
2091 
2092   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2093   PetscLogObjectParent(B,b->A);
2094   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2095   PetscLogObjectParent(B,b->B);
2096 
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 #undef __FUNCT__
2101 #define __FUNCT__ "MatCreateMPIAIJ"
2102 /*@C
2103    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2104    (the default parallel PETSc format).  For good matrix assembly performance
2105    the user should preallocate the matrix storage by setting the parameters
2106    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2107    performance can be increased by more than a factor of 50.
2108 
2109    Collective on MPI_Comm
2110 
2111    Input Parameters:
2112 +  comm - MPI communicator
2113 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2114            This value should be the same as the local size used in creating the
2115            y vector for the matrix-vector product y = Ax.
2116 .  n - This value should be the same as the local size used in creating the
2117        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2118        calculated if N is given) For square matrices n is almost always m.
2119 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2120 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2121 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2122            (same value is used for all local rows)
2123 .  d_nnz - array containing the number of nonzeros in the various rows of the
2124            DIAGONAL portion of the local submatrix (possibly different for each row)
2125            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2126            The size of this array is equal to the number of local rows, i.e 'm'.
2127            You must leave room for the diagonal entry even if it is zero.
2128 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2129            submatrix (same value is used for all local rows).
2130 -  o_nnz - array containing the number of nonzeros in the various rows of the
2131            OFF-DIAGONAL portion of the local submatrix (possibly different for
2132            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2133            structure. The size of this array is equal to the number
2134            of local rows, i.e 'm'.
2135 
2136    Output Parameter:
2137 .  A - the matrix
2138 
2139    Notes:
2140    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2141    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2142    storage requirements for this matrix.
2143 
2144    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2145    processor than it must be used on all processors that share the object for
2146    that argument.
2147 
2148    The AIJ format (also called the Yale sparse matrix format or
2149    compressed row storage), is fully compatible with standard Fortran 77
2150    storage.  That is, the stored row and column indices can begin at
2151    either one (as in Fortran) or zero.  See the users manual for details.
2152 
2153    The user MUST specify either the local or global matrix dimensions
2154    (possibly both).
2155 
2156    The parallel matrix is partitioned such that the first m0 rows belong to
2157    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2158    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2159 
2160    The DIAGONAL portion of the local submatrix of a processor can be defined
2161    as the submatrix which is obtained by extraction the part corresponding
2162    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2163    first row that belongs to the processor, and r2 is the last row belonging
2164    to the this processor. This is a square mxm matrix. The remaining portion
2165    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2166 
2167    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2168 
2169    By default, this format uses inodes (identical nodes) when possible.
2170    We search for consecutive rows with the same nonzero structure, thereby
2171    reusing matrix information to achieve increased efficiency.
2172 
2173    Options Database Keys:
2174 +  -mat_aij_no_inode  - Do not use inodes
2175 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2176 -  -mat_aij_oneindex - Internally use indexing starting at 1
2177         rather than 0.  Note that when calling MatSetValues(),
2178         the user still MUST index entries starting at 0!
2179 
2180 
2181    Example usage:
2182 
2183    Consider the following 8x8 matrix with 34 non-zero values, that is
2184    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2185    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2186    as follows:
2187 
2188 .vb
2189             1  2  0  |  0  3  0  |  0  4
2190     Proc0   0  5  6  |  7  0  0  |  8  0
2191             9  0 10  | 11  0  0  | 12  0
2192     -------------------------------------
2193            13  0 14  | 15 16 17  |  0  0
2194     Proc1   0 18  0  | 19 20 21  |  0  0
2195             0  0  0  | 22 23  0  | 24  0
2196     -------------------------------------
2197     Proc2  25 26 27  |  0  0 28  | 29  0
2198            30  0  0  | 31 32 33  |  0 34
2199 .ve
2200 
2201    This can be represented as a collection of submatrices as:
2202 
2203 .vb
2204       A B C
2205       D E F
2206       G H I
2207 .ve
2208 
2209    Where the submatrices A,B,C are owned by proc0, D,E,F are
2210    owned by proc1, G,H,I are owned by proc2.
2211 
2212    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2213    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2214    The 'M','N' parameters are 8,8, and have the same values on all procs.
2215 
2216    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2217    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2218    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2219    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2220    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2221    matrix, ans [DF] as another SeqAIJ matrix.
2222 
2223    When d_nz, o_nz parameters are specified, d_nz storage elements are
2224    allocated for every row of the local diagonal submatrix, and o_nz
2225    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2226    One way to choose d_nz and o_nz is to use the max nonzerors per local
2227    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2228    In this case, the values of d_nz,o_nz are:
2229 .vb
2230      proc0 : dnz = 2, o_nz = 2
2231      proc1 : dnz = 3, o_nz = 2
2232      proc2 : dnz = 1, o_nz = 4
2233 .ve
2234    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2235    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2236    for proc3. i.e we are using 12+15+10=37 storage locations to store
2237    34 values.
2238 
2239    When d_nnz, o_nnz parameters are specified, the storage is specified
2240    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2241    In the above case the values for d_nnz,o_nnz are:
2242 .vb
2243      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2244      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2245      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2246 .ve
2247    Here the space allocated is sum of all the above values i.e 34, and
2248    hence pre-allocation is perfect.
2249 
2250    Level: intermediate
2251 
2252 .keywords: matrix, aij, compressed row, sparse, parallel
2253 
2254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2255 @*/
2256 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2257 {
2258   int ierr,size;
2259 
2260   PetscFunctionBegin;
2261   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2262   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2263   if (size > 1) {
2264     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2265     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2266   } else {
2267     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2268     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2269   }
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2275 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2276 {
2277   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2278   PetscFunctionBegin;
2279   *Ad     = a->A;
2280   *Ao     = a->B;
2281   *colmap = a->garray;
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2287 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2288 {
2289   int        ierr;
2290   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2291 
2292   PetscFunctionBegin;
2293   if (coloring->ctype == IS_COLORING_LOCAL) {
2294     int        *allcolors,*colors,i;
2295     ISColoring ocoloring;
2296 
2297     /* set coloring for diagonal portion */
2298     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2299 
2300     /* set coloring for off-diagonal portion */
2301     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2302     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2303     for (i=0; i<a->B->n; i++) {
2304       colors[i] = allcolors[a->garray[i]];
2305     }
2306     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2307     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2308     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2309     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2310   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2311     int        *colors,i,*larray;
2312     ISColoring ocoloring;
2313 
2314     /* set coloring for diagonal portion */
2315     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2316     for (i=0; i<a->A->n; i++) {
2317       larray[i] = i + a->cstart;
2318     }
2319     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2320     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2321     for (i=0; i<a->A->n; i++) {
2322       colors[i] = coloring->colors[larray[i]];
2323     }
2324     ierr = PetscFree(larray);CHKERRQ(ierr);
2325     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2326     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2327     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2328 
2329     /* set coloring for off-diagonal portion */
2330     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2331     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2332     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2333     for (i=0; i<a->B->n; i++) {
2334       colors[i] = coloring->colors[larray[i]];
2335     }
2336     ierr = PetscFree(larray);CHKERRQ(ierr);
2337     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2338     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2339     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2340   } else {
2341     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2342   }
2343 
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2349 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2350 {
2351   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2352   int        ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2356   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2362 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2363 {
2364   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2365   int        ierr;
2366 
2367   PetscFunctionBegin;
2368   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2369   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372