xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c14dc6b6670b013458f80652c6b8e042caee0f12)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     PetscScalar *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatRelax_MPIAIJ"
835 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
836 {
837   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
838   int          ierr;
839   Vec          bb1;
840   PetscReal    mone=-1.0;
841 
842   PetscFunctionBegin;
843 
844   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
845 
846   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
847     if (flag & SOR_ZERO_INITIAL_GUESS) {
848       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
849       its--;
850     }
851 
852     /* ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); */
853     while (its--) {
854       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
855       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
856 
857       /* update rhs: bb1 = bb - B*x */
858       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
859       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
860 
861       /* local sweep */
862       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
863       CHKERRQ(ierr);
864     }
865   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
866     if (flag & SOR_ZERO_INITIAL_GUESS) {
867       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
868       its--;
869     }
870     while (its--) {
871       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
872       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
873 
874       /* update rhs: bb1 = bb - B*x */
875       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
876       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
877 
878       /* local sweep */
879       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
880       CHKERRQ(ierr);
881     }
882   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
883     if (flag & SOR_ZERO_INITIAL_GUESS) {
884       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
885       its--;
886     }
887     while (its--) {
888       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
889       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
890 
891       /* update rhs: bb1 = bb - B*x */
892       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
893       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
894 
895       /* local sweep */
896       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
897       CHKERRQ(ierr);
898     }
899   } else {
900     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
901   }
902 
903   ierr = VecDestroy(bb1);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "MatGetInfo_MPIAIJ"
909 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
910 {
911   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
912   Mat        A = mat->A,B = mat->B;
913   int        ierr;
914   PetscReal  isend[5],irecv[5];
915 
916   PetscFunctionBegin;
917   info->block_size     = 1.0;
918   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
919   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
920   isend[3] = info->memory;  isend[4] = info->mallocs;
921   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
922   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
923   isend[3] += info->memory;  isend[4] += info->mallocs;
924   if (flag == MAT_LOCAL) {
925     info->nz_used      = isend[0];
926     info->nz_allocated = isend[1];
927     info->nz_unneeded  = isend[2];
928     info->memory       = isend[3];
929     info->mallocs      = isend[4];
930   } else if (flag == MAT_GLOBAL_MAX) {
931     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
932     info->nz_used      = irecv[0];
933     info->nz_allocated = irecv[1];
934     info->nz_unneeded  = irecv[2];
935     info->memory       = irecv[3];
936     info->mallocs      = irecv[4];
937   } else if (flag == MAT_GLOBAL_SUM) {
938     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
939     info->nz_used      = irecv[0];
940     info->nz_allocated = irecv[1];
941     info->nz_unneeded  = irecv[2];
942     info->memory       = irecv[3];
943     info->mallocs      = irecv[4];
944   }
945   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
946   info->fill_ratio_needed = 0;
947   info->factor_mallocs    = 0;
948   info->rows_global       = (double)matin->M;
949   info->columns_global    = (double)matin->N;
950   info->rows_local        = (double)matin->m;
951   info->columns_local     = (double)matin->N;
952 
953   PetscFunctionReturn(0);
954 }
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatSetOption_MPIAIJ"
958 int MatSetOption_MPIAIJ(Mat A,MatOption op)
959 {
960   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
961   int        ierr;
962 
963   PetscFunctionBegin;
964   switch (op) {
965   case MAT_NO_NEW_NONZERO_LOCATIONS:
966   case MAT_YES_NEW_NONZERO_LOCATIONS:
967   case MAT_COLUMNS_UNSORTED:
968   case MAT_COLUMNS_SORTED:
969   case MAT_NEW_NONZERO_ALLOCATION_ERR:
970   case MAT_KEEP_ZEROED_ROWS:
971   case MAT_NEW_NONZERO_LOCATION_ERR:
972   case MAT_USE_INODES:
973   case MAT_DO_NOT_USE_INODES:
974   case MAT_IGNORE_ZERO_ENTRIES:
975     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
976     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
977     break;
978   case MAT_ROW_ORIENTED:
979     a->roworiented = PETSC_TRUE;
980     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
981     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
982     break;
983   case MAT_ROWS_SORTED:
984   case MAT_ROWS_UNSORTED:
985   case MAT_YES_NEW_DIAGONALS:
986   case MAT_USE_SINGLE_PRECISION_SOLVES:
987     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
988     break;
989   case MAT_COLUMN_ORIENTED:
990     a->roworiented = PETSC_FALSE;
991     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
992     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
993     break;
994   case MAT_IGNORE_OFF_PROC_ENTRIES:
995     a->donotstash = PETSC_TRUE;
996     break;
997   case MAT_NO_NEW_DIAGONALS:
998     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
999     break;
1000   default:
1001     SETERRQ(PETSC_ERR_SUP,"unknown option");
1002     break;
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatGetRow_MPIAIJ"
1009 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1010 {
1011   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1012   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1013   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1014   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1015   int          *cmap,*idx_p;
1016 
1017   PetscFunctionBegin;
1018   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1019   mat->getrowactive = PETSC_TRUE;
1020 
1021   if (!mat->rowvalues && (idx || v)) {
1022     /*
1023         allocate enough space to hold information from the longest row.
1024     */
1025     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1026     int     max = 1,tmp;
1027     for (i=0; i<matin->m; i++) {
1028       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1029       if (max < tmp) { max = tmp; }
1030     }
1031     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1032     mat->rowindices = (int*)(mat->rowvalues + max);
1033   }
1034 
1035   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1036   lrow = row - rstart;
1037 
1038   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1039   if (!v)   {pvA = 0; pvB = 0;}
1040   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1041   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1042   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1043   nztot = nzA + nzB;
1044 
1045   cmap  = mat->garray;
1046   if (v  || idx) {
1047     if (nztot) {
1048       /* Sort by increasing column numbers, assuming A and B already sorted */
1049       int imark = -1;
1050       if (v) {
1051         *v = v_p = mat->rowvalues;
1052         for (i=0; i<nzB; i++) {
1053           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1054           else break;
1055         }
1056         imark = i;
1057         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1058         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1059       }
1060       if (idx) {
1061         *idx = idx_p = mat->rowindices;
1062         if (imark > -1) {
1063           for (i=0; i<imark; i++) {
1064             idx_p[i] = cmap[cworkB[i]];
1065           }
1066         } else {
1067           for (i=0; i<nzB; i++) {
1068             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1069             else break;
1070           }
1071           imark = i;
1072         }
1073         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1074         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1075       }
1076     } else {
1077       if (idx) *idx = 0;
1078       if (v)   *v   = 0;
1079     }
1080   }
1081   *nz = nztot;
1082   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1083   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1089 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1090 {
1091   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1092 
1093   PetscFunctionBegin;
1094   if (aij->getrowactive == PETSC_FALSE) {
1095     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1096   }
1097   aij->getrowactive = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatNorm_MPIAIJ"
1103 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1104 {
1105   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1106   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1107   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1108   PetscReal    sum = 0.0;
1109   PetscScalar  *v;
1110 
1111   PetscFunctionBegin;
1112   if (aij->size == 1) {
1113     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1114   } else {
1115     if (type == NORM_FROBENIUS) {
1116       v = amat->a;
1117       for (i=0; i<amat->nz; i++) {
1118 #if defined(PETSC_USE_COMPLEX)
1119         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1120 #else
1121         sum += (*v)*(*v); v++;
1122 #endif
1123       }
1124       v = bmat->a;
1125       for (i=0; i<bmat->nz; i++) {
1126 #if defined(PETSC_USE_COMPLEX)
1127         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1128 #else
1129         sum += (*v)*(*v); v++;
1130 #endif
1131       }
1132       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1133       *norm = sqrt(*norm);
1134     } else if (type == NORM_1) { /* max column norm */
1135       PetscReal *tmp,*tmp2;
1136       int    *jj,*garray = aij->garray;
1137       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1138       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1139       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1140       *norm = 0.0;
1141       v = amat->a; jj = amat->j;
1142       for (j=0; j<amat->nz; j++) {
1143         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1144       }
1145       v = bmat->a; jj = bmat->j;
1146       for (j=0; j<bmat->nz; j++) {
1147         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1148       }
1149       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1150       for (j=0; j<mat->N; j++) {
1151         if (tmp2[j] > *norm) *norm = tmp2[j];
1152       }
1153       ierr = PetscFree(tmp);CHKERRQ(ierr);
1154       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1155     } else if (type == NORM_INFINITY) { /* max row norm */
1156       PetscReal ntemp = 0.0;
1157       for (j=0; j<aij->A->m; j++) {
1158         v = amat->a + amat->i[j] + shift;
1159         sum = 0.0;
1160         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1161           sum += PetscAbsScalar(*v); v++;
1162         }
1163         v = bmat->a + bmat->i[j] + shift;
1164         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1165           sum += PetscAbsScalar(*v); v++;
1166         }
1167         if (sum > ntemp) ntemp = sum;
1168       }
1169       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1170     } else {
1171       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1172     }
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatTranspose_MPIAIJ"
1179 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1180 {
1181   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1182   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1183   int          ierr,shift = Aloc->indexshift;
1184   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1185   Mat          B;
1186   PetscScalar  *array;
1187 
1188   PetscFunctionBegin;
1189   if (!matout && M != N) {
1190     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1191   }
1192 
1193   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1194 
1195   /* copy over the A part */
1196   Aloc = (Mat_SeqAIJ*)a->A->data;
1197   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1198   row = a->rstart;
1199   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1200   for (i=0; i<m; i++) {
1201     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1202     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1203   }
1204   aj = Aloc->j;
1205   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1206 
1207   /* copy over the B part */
1208   Aloc = (Mat_SeqAIJ*)a->B->data;
1209   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1210   row  = a->rstart;
1211   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1212   ct   = cols;
1213   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1214   for (i=0; i<m; i++) {
1215     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1216     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1217   }
1218   ierr = PetscFree(ct);CHKERRQ(ierr);
1219   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1220   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1221   if (matout) {
1222     *matout = B;
1223   } else {
1224     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1231 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1232 {
1233   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1234   Mat        a = aij->A,b = aij->B;
1235   int        ierr,s1,s2,s3;
1236 
1237   PetscFunctionBegin;
1238   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1239   if (rr) {
1240     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1241     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1242     /* Overlap communication with computation. */
1243     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1244   }
1245   if (ll) {
1246     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1247     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1248     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1249   }
1250   /* scale  the diagonal block */
1251   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1252 
1253   if (rr) {
1254     /* Do a scatter end and then right scale the off-diagonal block */
1255     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1256     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1257   }
1258 
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1265 int MatPrintHelp_MPIAIJ(Mat A)
1266 {
1267   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1268   int        ierr;
1269 
1270   PetscFunctionBegin;
1271   if (!a->rank) {
1272     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1279 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1280 {
1281   PetscFunctionBegin;
1282   *bs = 1;
1283   PetscFunctionReturn(0);
1284 }
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1287 int MatSetUnfactored_MPIAIJ(Mat A)
1288 {
1289   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1290   int        ierr;
1291 
1292   PetscFunctionBegin;
1293   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatEqual_MPIAIJ"
1299 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1300 {
1301   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1302   Mat        a,b,c,d;
1303   PetscTruth flg;
1304   int        ierr;
1305 
1306   PetscFunctionBegin;
1307   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1308   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1309   a = matA->A; b = matA->B;
1310   c = matB->A; d = matB->B;
1311 
1312   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1313   if (flg == PETSC_TRUE) {
1314     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1315   }
1316   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatCopy_MPIAIJ"
1322 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1323 {
1324   int        ierr;
1325   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1326   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1327   PetscTruth flg;
1328 
1329   PetscFunctionBegin;
1330   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1331   if (str != SAME_NONZERO_PATTERN || !flg) {
1332     /* because of the column compression in the off-processor part of the matrix a->B,
1333        the number of columns in a->B and b->B may be different, hence we cannot call
1334        the MatCopy() directly on the two parts. If need be, we can provide a more
1335        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1336        then copying the submatrices */
1337     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1338   } else {
1339     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1340     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1347 int MatSetUpPreallocation_MPIAIJ(Mat A)
1348 {
1349   int        ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1357 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1358 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1359 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1360 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1361 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1362 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1363 #endif
1364 
1365 /* -------------------------------------------------------------------*/
1366 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1367        MatGetRow_MPIAIJ,
1368        MatRestoreRow_MPIAIJ,
1369        MatMult_MPIAIJ,
1370        MatMultAdd_MPIAIJ,
1371        MatMultTranspose_MPIAIJ,
1372        MatMultTransposeAdd_MPIAIJ,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379        MatRelax_MPIAIJ,
1380        MatTranspose_MPIAIJ,
1381        MatGetInfo_MPIAIJ,
1382        MatEqual_MPIAIJ,
1383        MatGetDiagonal_MPIAIJ,
1384        MatDiagonalScale_MPIAIJ,
1385        MatNorm_MPIAIJ,
1386        MatAssemblyBegin_MPIAIJ,
1387        MatAssemblyEnd_MPIAIJ,
1388        0,
1389        MatSetOption_MPIAIJ,
1390        MatZeroEntries_MPIAIJ,
1391        MatZeroRows_MPIAIJ,
1392 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1393        MatLUFactorSymbolic_MPIAIJ_TFS,
1394 #else
1395        0,
1396 #endif
1397        0,
1398        0,
1399        0,
1400        MatSetUpPreallocation_MPIAIJ,
1401        0,
1402        0,
1403        0,
1404        0,
1405        MatDuplicate_MPIAIJ,
1406        0,
1407        0,
1408        0,
1409        0,
1410        0,
1411        MatGetSubMatrices_MPIAIJ,
1412        MatIncreaseOverlap_MPIAIJ,
1413        MatGetValues_MPIAIJ,
1414        MatCopy_MPIAIJ,
1415        MatPrintHelp_MPIAIJ,
1416        MatScale_MPIAIJ,
1417        0,
1418        0,
1419        0,
1420        MatGetBlockSize_MPIAIJ,
1421        0,
1422        0,
1423        0,
1424        0,
1425        MatFDColoringCreate_MPIAIJ,
1426        0,
1427        MatSetUnfactored_MPIAIJ,
1428        0,
1429        0,
1430        MatGetSubMatrix_MPIAIJ,
1431        MatDestroy_MPIAIJ,
1432        MatView_MPIAIJ,
1433        MatGetPetscMaps_Petsc,
1434        0,
1435        0,
1436        0,
1437        0,
1438        0,
1439        0,
1440        0,
1441        0,
1442        MatSetColoring_MPIAIJ,
1443        MatSetValuesAdic_MPIAIJ,
1444        MatSetValuesAdifor_MPIAIJ
1445 };
1446 
1447 /* ----------------------------------------------------------------------------------------*/
1448 
1449 EXTERN_C_BEGIN
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1452 int MatStoreValues_MPIAIJ(Mat mat)
1453 {
1454   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1455   int        ierr;
1456 
1457   PetscFunctionBegin;
1458   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1459   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 EXTERN_C_END
1463 
1464 EXTERN_C_BEGIN
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1467 int MatRetrieveValues_MPIAIJ(Mat mat)
1468 {
1469   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1470   int        ierr;
1471 
1472   PetscFunctionBegin;
1473   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1474   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 EXTERN_C_END
1478 
1479 #include "petscpc.h"
1480 EXTERN_C_BEGIN
1481 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1482 EXTERN_C_END
1483 
1484 EXTERN_C_BEGIN
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatCreate_MPIAIJ"
1487 int MatCreate_MPIAIJ(Mat B)
1488 {
1489   Mat_MPIAIJ   *b;
1490   int          ierr,i,size;
1491 
1492   PetscFunctionBegin;
1493   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1494 
1495   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1496   B->data         = (void*)b;
1497   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1498   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1499   B->factor       = 0;
1500   B->assembled    = PETSC_FALSE;
1501   B->mapping      = 0;
1502 
1503   B->insertmode      = NOT_SET_VALUES;
1504   b->size            = size;
1505   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1506 
1507   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1508   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1509 
1510   /* the information in the maps duplicates the information computed below, eventually
1511      we should remove the duplicate information that is not contained in the maps */
1512   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1513   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1514 
1515   /* build local table of row and column ownerships */
1516   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1517   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1518   b->cowners = b->rowners + b->size + 2;
1519   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1520   b->rowners[0] = 0;
1521   for (i=2; i<=b->size; i++) {
1522     b->rowners[i] += b->rowners[i-1];
1523   }
1524   b->rstart = b->rowners[b->rank];
1525   b->rend   = b->rowners[b->rank+1];
1526   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1527   b->cowners[0] = 0;
1528   for (i=2; i<=b->size; i++) {
1529     b->cowners[i] += b->cowners[i-1];
1530   }
1531   b->cstart = b->cowners[b->rank];
1532   b->cend   = b->cowners[b->rank+1];
1533 
1534   /* build cache for off array entries formed */
1535   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1536   b->donotstash  = PETSC_FALSE;
1537   b->colmap      = 0;
1538   b->garray      = 0;
1539   b->roworiented = PETSC_TRUE;
1540 
1541   /* stuff used for matrix vector multiply */
1542   b->lvec      = PETSC_NULL;
1543   b->Mvctx     = PETSC_NULL;
1544 
1545   /* stuff for MatGetRow() */
1546   b->rowindices   = 0;
1547   b->rowvalues    = 0;
1548   b->getrowactive = PETSC_FALSE;
1549 
1550   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1551                                      "MatStoreValues_MPIAIJ",
1552                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1553   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1554                                      "MatRetrieveValues_MPIAIJ",
1555                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1556   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1557                                      "MatGetDiagonalBlock_MPIAIJ",
1558                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 EXTERN_C_END
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1565 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1566 {
1567   Mat        mat;
1568   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1569   int        ierr;
1570 
1571   PetscFunctionBegin;
1572   *newmat       = 0;
1573   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1574   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1575   a    = (Mat_MPIAIJ*)mat->data;
1576   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1577   mat->factor       = matin->factor;
1578   mat->assembled    = PETSC_TRUE;
1579   mat->insertmode   = NOT_SET_VALUES;
1580   mat->preallocated = PETSC_TRUE;
1581 
1582   a->rstart       = oldmat->rstart;
1583   a->rend         = oldmat->rend;
1584   a->cstart       = oldmat->cstart;
1585   a->cend         = oldmat->cend;
1586   a->size         = oldmat->size;
1587   a->rank         = oldmat->rank;
1588   a->donotstash   = oldmat->donotstash;
1589   a->roworiented  = oldmat->roworiented;
1590   a->rowindices   = 0;
1591   a->rowvalues    = 0;
1592   a->getrowactive = PETSC_FALSE;
1593 
1594   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1595   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1596   if (oldmat->colmap) {
1597 #if defined (PETSC_USE_CTABLE)
1598     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1599 #else
1600     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1601     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1602     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1603 #endif
1604   } else a->colmap = 0;
1605   if (oldmat->garray) {
1606     int len;
1607     len  = oldmat->B->n;
1608     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1609     PetscLogObjectMemory(mat,len*sizeof(int));
1610     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1611   } else a->garray = 0;
1612 
1613   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1614   PetscLogObjectParent(mat,a->lvec);
1615   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1616   PetscLogObjectParent(mat,a->Mvctx);
1617   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1618   PetscLogObjectParent(mat,a->A);
1619   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1620   PetscLogObjectParent(mat,a->B);
1621   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1622   *newmat = mat;
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 #include "petscsys.h"
1627 
1628 EXTERN_C_BEGIN
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatLoad_MPIAIJ"
1631 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1632 {
1633   Mat          A;
1634   PetscScalar  *vals,*svals;
1635   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1636   MPI_Status   status;
1637   int          i,nz,ierr,j,rstart,rend,fd;
1638   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1639   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1640   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1641 
1642   PetscFunctionBegin;
1643   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1644   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1645   if (!rank) {
1646     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1647     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1648     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1649     if (header[3] < 0) {
1650       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1651     }
1652   }
1653 
1654   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1655   M = header[1]; N = header[2];
1656   /* determine ownership of all rows */
1657   m = M/size + ((M % size) > rank);
1658   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1659   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1660   rowners[0] = 0;
1661   for (i=2; i<=size; i++) {
1662     rowners[i] += rowners[i-1];
1663   }
1664   rstart = rowners[rank];
1665   rend   = rowners[rank+1];
1666 
1667   /* distribute row lengths to all processors */
1668   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1669   offlens = ourlens + (rend-rstart);
1670   if (!rank) {
1671     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1672     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1673     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1674     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1675     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1676     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1677   } else {
1678     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1679   }
1680 
1681   if (!rank) {
1682     /* calculate the number of nonzeros on each processor */
1683     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1684     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1685     for (i=0; i<size; i++) {
1686       for (j=rowners[i]; j< rowners[i+1]; j++) {
1687         procsnz[i] += rowlengths[j];
1688       }
1689     }
1690     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1691 
1692     /* determine max buffer needed and allocate it */
1693     maxnz = 0;
1694     for (i=0; i<size; i++) {
1695       maxnz = PetscMax(maxnz,procsnz[i]);
1696     }
1697     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1698 
1699     /* read in my part of the matrix column indices  */
1700     nz   = procsnz[0];
1701     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1702     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1703 
1704     /* read in every one elses and ship off */
1705     for (i=1; i<size; i++) {
1706       nz   = procsnz[i];
1707       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1708       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1709     }
1710     ierr = PetscFree(cols);CHKERRQ(ierr);
1711   } else {
1712     /* determine buffer space needed for message */
1713     nz = 0;
1714     for (i=0; i<m; i++) {
1715       nz += ourlens[i];
1716     }
1717     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1718 
1719     /* receive message of column indices*/
1720     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1721     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1722     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1723   }
1724 
1725   /* determine column ownership if matrix is not square */
1726   if (N != M) {
1727     n      = N/size + ((N % size) > rank);
1728     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1729     cstart = cend - n;
1730   } else {
1731     cstart = rstart;
1732     cend   = rend;
1733     n      = cend - cstart;
1734   }
1735 
1736   /* loop over local rows, determining number of off diagonal entries */
1737   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1738   jj = 0;
1739   for (i=0; i<m; i++) {
1740     for (j=0; j<ourlens[i]; j++) {
1741       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1742       jj++;
1743     }
1744   }
1745 
1746   /* create our matrix */
1747   for (i=0; i<m; i++) {
1748     ourlens[i] -= offlens[i];
1749   }
1750   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1751   A = *newmat;
1752   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1753   for (i=0; i<m; i++) {
1754     ourlens[i] += offlens[i];
1755   }
1756 
1757   if (!rank) {
1758     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1759 
1760     /* read in my part of the matrix numerical values  */
1761     nz   = procsnz[0];
1762     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1763 
1764     /* insert into matrix */
1765     jj      = rstart;
1766     smycols = mycols;
1767     svals   = vals;
1768     for (i=0; i<m; i++) {
1769       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1770       smycols += ourlens[i];
1771       svals   += ourlens[i];
1772       jj++;
1773     }
1774 
1775     /* read in other processors and ship out */
1776     for (i=1; i<size; i++) {
1777       nz   = procsnz[i];
1778       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1779       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1780     }
1781     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1782   } else {
1783     /* receive numeric values */
1784     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1785 
1786     /* receive message of values*/
1787     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1788     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1789     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1790 
1791     /* insert into matrix */
1792     jj      = rstart;
1793     smycols = mycols;
1794     svals   = vals;
1795     for (i=0; i<m; i++) {
1796       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1797       smycols += ourlens[i];
1798       svals   += ourlens[i];
1799       jj++;
1800     }
1801   }
1802   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1803   ierr = PetscFree(vals);CHKERRQ(ierr);
1804   ierr = PetscFree(mycols);CHKERRQ(ierr);
1805   ierr = PetscFree(rowners);CHKERRQ(ierr);
1806 
1807   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1808   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1815 /*
1816     Not great since it makes two copies of the submatrix, first an SeqAIJ
1817   in local and then by concatenating the local matrices the end result.
1818   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1819 */
1820 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1821 {
1822   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1823   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1824   Mat          *local,M,Mreuse;
1825   PetscScalar  *vwork,*aa;
1826   MPI_Comm     comm = mat->comm;
1827   Mat_SeqAIJ   *aij;
1828 
1829 
1830   PetscFunctionBegin;
1831   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1832   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1833 
1834   if (call ==  MAT_REUSE_MATRIX) {
1835     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1836     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1837     local = &Mreuse;
1838     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1839   } else {
1840     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1841     Mreuse = *local;
1842     ierr   = PetscFree(local);CHKERRQ(ierr);
1843   }
1844 
1845   /*
1846       m - number of local rows
1847       n - number of columns (same on all processors)
1848       rstart - first row in new global matrix generated
1849   */
1850   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1851   if (call == MAT_INITIAL_MATRIX) {
1852     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1853     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1854     ii  = aij->i;
1855     jj  = aij->j;
1856 
1857     /*
1858         Determine the number of non-zeros in the diagonal and off-diagonal
1859         portions of the matrix in order to do correct preallocation
1860     */
1861 
1862     /* first get start and end of "diagonal" columns */
1863     if (csize == PETSC_DECIDE) {
1864       nlocal = n/size + ((n % size) > rank);
1865     } else {
1866       nlocal = csize;
1867     }
1868     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1869     rstart = rend - nlocal;
1870     if (rank == size - 1 && rend != n) {
1871       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1872     }
1873 
1874     /* next, compute all the lengths */
1875     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1876     olens = dlens + m;
1877     for (i=0; i<m; i++) {
1878       jend = ii[i+1] - ii[i];
1879       olen = 0;
1880       dlen = 0;
1881       for (j=0; j<jend; j++) {
1882         if (*jj < rstart || *jj >= rend) olen++;
1883         else dlen++;
1884         jj++;
1885       }
1886       olens[i] = olen;
1887       dlens[i] = dlen;
1888     }
1889     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1890     ierr = PetscFree(dlens);CHKERRQ(ierr);
1891   } else {
1892     int ml,nl;
1893 
1894     M = *newmat;
1895     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1896     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1897     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1898     /*
1899          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1900        rather than the slower MatSetValues().
1901     */
1902     M->was_assembled = PETSC_TRUE;
1903     M->assembled     = PETSC_FALSE;
1904   }
1905   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1906   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1907   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1908   ii  = aij->i;
1909   jj  = aij->j;
1910   aa  = aij->a;
1911   for (i=0; i<m; i++) {
1912     row   = rstart + i;
1913     nz    = ii[i+1] - ii[i];
1914     cwork = jj;     jj += nz;
1915     vwork = aa;     aa += nz;
1916     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1917   }
1918 
1919   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1920   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1921   *newmat = M;
1922 
1923   /* save submatrix used in processor for next request */
1924   if (call ==  MAT_INITIAL_MATRIX) {
1925     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1926     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1927   }
1928 
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1934 /*@C
1935    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1936    (the default parallel PETSc format).  For good matrix assembly performance
1937    the user should preallocate the matrix storage by setting the parameters
1938    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1939    performance can be increased by more than a factor of 50.
1940 
1941    Collective on MPI_Comm
1942 
1943    Input Parameters:
1944 +  A - the matrix
1945 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1946            (same value is used for all local rows)
1947 .  d_nnz - array containing the number of nonzeros in the various rows of the
1948            DIAGONAL portion of the local submatrix (possibly different for each row)
1949            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1950            The size of this array is equal to the number of local rows, i.e 'm'.
1951            You must leave room for the diagonal entry even if it is zero.
1952 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1953            submatrix (same value is used for all local rows).
1954 -  o_nnz - array containing the number of nonzeros in the various rows of the
1955            OFF-DIAGONAL portion of the local submatrix (possibly different for
1956            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1957            structure. The size of this array is equal to the number
1958            of local rows, i.e 'm'.
1959 
1960    The AIJ format (also called the Yale sparse matrix format or
1961    compressed row storage), is fully compatible with standard Fortran 77
1962    storage.  That is, the stored row and column indices can begin at
1963    either one (as in Fortran) or zero.  See the users manual for details.
1964 
1965    The user MUST specify either the local or global matrix dimensions
1966    (possibly both).
1967 
1968    The parallel matrix is partitioned such that the first m0 rows belong to
1969    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1970    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1971 
1972    The DIAGONAL portion of the local submatrix of a processor can be defined
1973    as the submatrix which is obtained by extraction the part corresponding
1974    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1975    first row that belongs to the processor, and r2 is the last row belonging
1976    to the this processor. This is a square mxm matrix. The remaining portion
1977    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1978 
1979    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1980 
1981    By default, this format uses inodes (identical nodes) when possible.
1982    We search for consecutive rows with the same nonzero structure, thereby
1983    reusing matrix information to achieve increased efficiency.
1984 
1985    Options Database Keys:
1986 +  -mat_aij_no_inode  - Do not use inodes
1987 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1988 -  -mat_aij_oneindex - Internally use indexing starting at 1
1989         rather than 0.  Note that when calling MatSetValues(),
1990         the user still MUST index entries starting at 0!
1991 
1992    Example usage:
1993 
1994    Consider the following 8x8 matrix with 34 non-zero values, that is
1995    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1996    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1997    as follows:
1998 
1999 .vb
2000             1  2  0  |  0  3  0  |  0  4
2001     Proc0   0  5  6  |  7  0  0  |  8  0
2002             9  0 10  | 11  0  0  | 12  0
2003     -------------------------------------
2004            13  0 14  | 15 16 17  |  0  0
2005     Proc1   0 18  0  | 19 20 21  |  0  0
2006             0  0  0  | 22 23  0  | 24  0
2007     -------------------------------------
2008     Proc2  25 26 27  |  0  0 28  | 29  0
2009            30  0  0  | 31 32 33  |  0 34
2010 .ve
2011 
2012    This can be represented as a collection of submatrices as:
2013 
2014 .vb
2015       A B C
2016       D E F
2017       G H I
2018 .ve
2019 
2020    Where the submatrices A,B,C are owned by proc0, D,E,F are
2021    owned by proc1, G,H,I are owned by proc2.
2022 
2023    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2024    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2025    The 'M','N' parameters are 8,8, and have the same values on all procs.
2026 
2027    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2028    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2029    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2030    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2031    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2032    matrix, ans [DF] as another SeqAIJ matrix.
2033 
2034    When d_nz, o_nz parameters are specified, d_nz storage elements are
2035    allocated for every row of the local diagonal submatrix, and o_nz
2036    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2037    One way to choose d_nz and o_nz is to use the max nonzerors per local
2038    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2039    In this case, the values of d_nz,o_nz are:
2040 .vb
2041      proc0 : dnz = 2, o_nz = 2
2042      proc1 : dnz = 3, o_nz = 2
2043      proc2 : dnz = 1, o_nz = 4
2044 .ve
2045    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2046    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2047    for proc3. i.e we are using 12+15+10=37 storage locations to store
2048    34 values.
2049 
2050    When d_nnz, o_nnz parameters are specified, the storage is specified
2051    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2052    In the above case the values for d_nnz,o_nnz are:
2053 .vb
2054      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2055      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2056      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2057 .ve
2058    Here the space allocated is sum of all the above values i.e 34, and
2059    hence pre-allocation is perfect.
2060 
2061    Level: intermediate
2062 
2063 .keywords: matrix, aij, compressed row, sparse, parallel
2064 
2065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2066 @*/
2067 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2068 {
2069   Mat_MPIAIJ   *b;
2070   int          ierr,i;
2071   PetscTruth   flg2;
2072 
2073   PetscFunctionBegin;
2074   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2075   if (!flg2) PetscFunctionReturn(0);
2076   B->preallocated = PETSC_TRUE;
2077   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2078   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2079   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2080   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2081   if (d_nnz) {
2082     for (i=0; i<B->m; i++) {
2083       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2084     }
2085   }
2086   if (o_nnz) {
2087     for (i=0; i<B->m; i++) {
2088       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2089     }
2090   }
2091   b = (Mat_MPIAIJ*)B->data;
2092 
2093   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2094   PetscLogObjectParent(B,b->A);
2095   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2096   PetscLogObjectParent(B,b->B);
2097 
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatCreateMPIAIJ"
2103 /*@C
2104    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2105    (the default parallel PETSc format).  For good matrix assembly performance
2106    the user should preallocate the matrix storage by setting the parameters
2107    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2108    performance can be increased by more than a factor of 50.
2109 
2110    Collective on MPI_Comm
2111 
2112    Input Parameters:
2113 +  comm - MPI communicator
2114 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2115            This value should be the same as the local size used in creating the
2116            y vector for the matrix-vector product y = Ax.
2117 .  n - This value should be the same as the local size used in creating the
2118        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2119        calculated if N is given) For square matrices n is almost always m.
2120 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2121 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2122 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2123            (same value is used for all local rows)
2124 .  d_nnz - array containing the number of nonzeros in the various rows of the
2125            DIAGONAL portion of the local submatrix (possibly different for each row)
2126            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2127            The size of this array is equal to the number of local rows, i.e 'm'.
2128            You must leave room for the diagonal entry even if it is zero.
2129 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2130            submatrix (same value is used for all local rows).
2131 -  o_nnz - array containing the number of nonzeros in the various rows of the
2132            OFF-DIAGONAL portion of the local submatrix (possibly different for
2133            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2134            structure. The size of this array is equal to the number
2135            of local rows, i.e 'm'.
2136 
2137    Output Parameter:
2138 .  A - the matrix
2139 
2140    Notes:
2141    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2142    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2143    storage requirements for this matrix.
2144 
2145    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2146    processor than it must be used on all processors that share the object for
2147    that argument.
2148 
2149    The AIJ format (also called the Yale sparse matrix format or
2150    compressed row storage), is fully compatible with standard Fortran 77
2151    storage.  That is, the stored row and column indices can begin at
2152    either one (as in Fortran) or zero.  See the users manual for details.
2153 
2154    The user MUST specify either the local or global matrix dimensions
2155    (possibly both).
2156 
2157    The parallel matrix is partitioned such that the first m0 rows belong to
2158    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2159    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2160 
2161    The DIAGONAL portion of the local submatrix of a processor can be defined
2162    as the submatrix which is obtained by extraction the part corresponding
2163    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2164    first row that belongs to the processor, and r2 is the last row belonging
2165    to the this processor. This is a square mxm matrix. The remaining portion
2166    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2167 
2168    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2169 
2170    By default, this format uses inodes (identical nodes) when possible.
2171    We search for consecutive rows with the same nonzero structure, thereby
2172    reusing matrix information to achieve increased efficiency.
2173 
2174    Options Database Keys:
2175 +  -mat_aij_no_inode  - Do not use inodes
2176 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2177 -  -mat_aij_oneindex - Internally use indexing starting at 1
2178         rather than 0.  Note that when calling MatSetValues(),
2179         the user still MUST index entries starting at 0!
2180 
2181 
2182    Example usage:
2183 
2184    Consider the following 8x8 matrix with 34 non-zero values, that is
2185    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2186    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2187    as follows:
2188 
2189 .vb
2190             1  2  0  |  0  3  0  |  0  4
2191     Proc0   0  5  6  |  7  0  0  |  8  0
2192             9  0 10  | 11  0  0  | 12  0
2193     -------------------------------------
2194            13  0 14  | 15 16 17  |  0  0
2195     Proc1   0 18  0  | 19 20 21  |  0  0
2196             0  0  0  | 22 23  0  | 24  0
2197     -------------------------------------
2198     Proc2  25 26 27  |  0  0 28  | 29  0
2199            30  0  0  | 31 32 33  |  0 34
2200 .ve
2201 
2202    This can be represented as a collection of submatrices as:
2203 
2204 .vb
2205       A B C
2206       D E F
2207       G H I
2208 .ve
2209 
2210    Where the submatrices A,B,C are owned by proc0, D,E,F are
2211    owned by proc1, G,H,I are owned by proc2.
2212 
2213    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2214    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2215    The 'M','N' parameters are 8,8, and have the same values on all procs.
2216 
2217    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2218    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2219    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2220    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2221    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2222    matrix, ans [DF] as another SeqAIJ matrix.
2223 
2224    When d_nz, o_nz parameters are specified, d_nz storage elements are
2225    allocated for every row of the local diagonal submatrix, and o_nz
2226    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2227    One way to choose d_nz and o_nz is to use the max nonzerors per local
2228    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2229    In this case, the values of d_nz,o_nz are:
2230 .vb
2231      proc0 : dnz = 2, o_nz = 2
2232      proc1 : dnz = 3, o_nz = 2
2233      proc2 : dnz = 1, o_nz = 4
2234 .ve
2235    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2236    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2237    for proc3. i.e we are using 12+15+10=37 storage locations to store
2238    34 values.
2239 
2240    When d_nnz, o_nnz parameters are specified, the storage is specified
2241    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2242    In the above case the values for d_nnz,o_nnz are:
2243 .vb
2244      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2245      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2246      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2247 .ve
2248    Here the space allocated is sum of all the above values i.e 34, and
2249    hence pre-allocation is perfect.
2250 
2251    Level: intermediate
2252 
2253 .keywords: matrix, aij, compressed row, sparse, parallel
2254 
2255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2256 @*/
2257 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2258 {
2259   int ierr,size;
2260 
2261   PetscFunctionBegin;
2262   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2263   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2264   if (size > 1) {
2265     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2266     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2267   } else {
2268     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2269     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2276 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2277 {
2278   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2279   PetscFunctionBegin;
2280   *Ad     = a->A;
2281   *Ao     = a->B;
2282   *colmap = a->garray;
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2288 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2289 {
2290   int        ierr;
2291   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2292 
2293   PetscFunctionBegin;
2294   if (coloring->ctype == IS_COLORING_LOCAL) {
2295     int        *allcolors,*colors,i;
2296     ISColoring ocoloring;
2297 
2298     /* set coloring for diagonal portion */
2299     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2300 
2301     /* set coloring for off-diagonal portion */
2302     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2303     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2304     for (i=0; i<a->B->n; i++) {
2305       colors[i] = allcolors[a->garray[i]];
2306     }
2307     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2308     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2309     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2310     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2311   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2312     int        *colors,i,*larray;
2313     ISColoring ocoloring;
2314 
2315     /* set coloring for diagonal portion */
2316     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2317     for (i=0; i<a->A->n; i++) {
2318       larray[i] = i + a->cstart;
2319     }
2320     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2321     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2322     for (i=0; i<a->A->n; i++) {
2323       colors[i] = coloring->colors[larray[i]];
2324     }
2325     ierr = PetscFree(larray);CHKERRQ(ierr);
2326     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2327     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2328     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2329 
2330     /* set coloring for off-diagonal portion */
2331     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2332     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2333     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2334     for (i=0; i<a->B->n; i++) {
2335       colors[i] = coloring->colors[larray[i]];
2336     }
2337     ierr = PetscFree(larray);CHKERRQ(ierr);
2338     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2339     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2340     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2341   } else {
2342     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2343   }
2344 
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2350 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2351 {
2352   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2353   int        ierr;
2354 
2355   PetscFunctionBegin;
2356   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2357   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2363 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2364 {
2365   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2366   int        ierr;
2367 
2368   PetscFunctionBegin;
2369   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2370   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373