xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0c2fff18d3777e2004c38c700aa5f85e14cc2190)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
722   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
723   PetscTruth        isdraw,isascii,flg;
724   PetscViewer       sviewer;
725   PetscViewerFormat format;
726 
727   PetscFunctionBegin;
728   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
730   if (isascii) {
731     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
732     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
733       MatInfo info;
734       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
735       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
736       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
737       if (flg) {
738         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
739 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
740       } else {
741         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
742 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
743       }
744       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
746       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
749       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
750       PetscFunctionReturn(0);
751     } else if (format == PETSC_VIEWER_ASCII_INFO) {
752       PetscFunctionReturn(0);
753     }
754   } else if (isdraw) {
755     PetscDraw       draw;
756     PetscTruth isnull;
757     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
758     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
759   }
760 
761   if (size == 1) {
762     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
763     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
764   } else {
765     /* assemble the entire matrix onto first processor. */
766     Mat         A;
767     Mat_SeqAIJ *Aloc;
768     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
769     PetscScalar *a;
770 
771     if (!rank) {
772       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
773     } else {
774       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
775     }
776     PetscLogObjectParent(mat,A);
777 
778     /* copy over the A part */
779     Aloc = (Mat_SeqAIJ*)aij->A->data;
780     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781     row = aij->rstart;
782     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
783     for (i=0; i<m; i++) {
784       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
785       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
786     }
787     aj = Aloc->j;
788     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
789 
790     /* copy over the B part */
791     Aloc = (Mat_SeqAIJ*)aij->B->data;
792     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
793     row  = aij->rstart;
794     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
795     ct   = cols;
796     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
797     for (i=0; i<m; i++) {
798       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800     }
801     ierr = PetscFree(ct);CHKERRQ(ierr);
802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     /*
805        Everyone has to call to draw the matrix since the graphics waits are
806        synchronized across all processors that share the PetscDraw object
807     */
808     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
809     if (!rank) {
810       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
811       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
812     }
813     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
814     ierr = MatDestroy(A);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatView_MPIAIJ"
821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
822 {
823   int        ierr;
824   PetscTruth isascii,isdraw,issocket,isbinary;
825 
826   PetscFunctionBegin;
827   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
828   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
829   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
830   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
831   if (isascii || isdraw || isbinary || issocket) {
832     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
833   } else {
834     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatRelax_MPIAIJ"
843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
844 {
845   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
846   int          ierr;
847   Vec          bb1;
848   PetscScalar  mone=-1.0;
849 
850   PetscFunctionBegin;
851 
852   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
853 
854   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
855     if (flag & SOR_ZERO_INITIAL_GUESS) {
856       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
857       its--;
858     }
859 
860     while (its--) {
861       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
862       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
863 
864       /* update rhs: bb1 = bb - B*x */
865       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
866       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
867 
868       /* local sweep */
869       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
870       CHKERRQ(ierr);
871     }
872   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
873     if (flag & SOR_ZERO_INITIAL_GUESS) {
874       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
875       its--;
876     }
877     while (its--) {
878       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
879       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880 
881       /* update rhs: bb1 = bb - B*x */
882       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
883       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
884 
885       /* local sweep */
886       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
887       CHKERRQ(ierr);
888     }
889   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
890     if (flag & SOR_ZERO_INITIAL_GUESS) {
891       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
892       its--;
893     }
894     while (its--) {
895       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
896       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897 
898       /* update rhs: bb1 = bb - B*x */
899       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
900       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
901 
902       /* local sweep */
903       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
904       CHKERRQ(ierr);
905     }
906   } else {
907     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
908   }
909 
910   ierr = VecDestroy(bb1);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatGetInfo_MPIAIJ"
916 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
917 {
918   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
919   Mat        A = mat->A,B = mat->B;
920   int        ierr;
921   PetscReal  isend[5],irecv[5];
922 
923   PetscFunctionBegin;
924   info->block_size     = 1.0;
925   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
926   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
927   isend[3] = info->memory;  isend[4] = info->mallocs;
928   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
929   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
930   isend[3] += info->memory;  isend[4] += info->mallocs;
931   if (flag == MAT_LOCAL) {
932     info->nz_used      = isend[0];
933     info->nz_allocated = isend[1];
934     info->nz_unneeded  = isend[2];
935     info->memory       = isend[3];
936     info->mallocs      = isend[4];
937   } else if (flag == MAT_GLOBAL_MAX) {
938     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
939     info->nz_used      = irecv[0];
940     info->nz_allocated = irecv[1];
941     info->nz_unneeded  = irecv[2];
942     info->memory       = irecv[3];
943     info->mallocs      = irecv[4];
944   } else if (flag == MAT_GLOBAL_SUM) {
945     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
946     info->nz_used      = irecv[0];
947     info->nz_allocated = irecv[1];
948     info->nz_unneeded  = irecv[2];
949     info->memory       = irecv[3];
950     info->mallocs      = irecv[4];
951   }
952   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
953   info->fill_ratio_needed = 0;
954   info->factor_mallocs    = 0;
955   info->rows_global       = (double)matin->M;
956   info->columns_global    = (double)matin->N;
957   info->rows_local        = (double)matin->m;
958   info->columns_local     = (double)matin->N;
959 
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatSetOption_MPIAIJ"
965 int MatSetOption_MPIAIJ(Mat A,MatOption op)
966 {
967   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
968   int        ierr;
969 
970   PetscFunctionBegin;
971   switch (op) {
972   case MAT_NO_NEW_NONZERO_LOCATIONS:
973   case MAT_YES_NEW_NONZERO_LOCATIONS:
974   case MAT_COLUMNS_UNSORTED:
975   case MAT_COLUMNS_SORTED:
976   case MAT_NEW_NONZERO_ALLOCATION_ERR:
977   case MAT_KEEP_ZEROED_ROWS:
978   case MAT_NEW_NONZERO_LOCATION_ERR:
979   case MAT_USE_INODES:
980   case MAT_DO_NOT_USE_INODES:
981   case MAT_IGNORE_ZERO_ENTRIES:
982     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
983     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
984     break;
985   case MAT_ROW_ORIENTED:
986     a->roworiented = PETSC_TRUE;
987     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
988     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
989     break;
990   case MAT_ROWS_SORTED:
991   case MAT_ROWS_UNSORTED:
992   case MAT_YES_NEW_DIAGONALS:
993   case MAT_USE_SINGLE_PRECISION_SOLVES:
994     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
995     break;
996   case MAT_COLUMN_ORIENTED:
997     a->roworiented = PETSC_FALSE;
998     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
999     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1000     break;
1001   case MAT_IGNORE_OFF_PROC_ENTRIES:
1002     a->donotstash = PETSC_TRUE;
1003     break;
1004   case MAT_NO_NEW_DIAGONALS:
1005     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1006     break;
1007   default:
1008     SETERRQ(PETSC_ERR_SUP,"unknown option");
1009     break;
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatGetRow_MPIAIJ"
1016 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1017 {
1018   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1020   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1021   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1022   int          *cmap,*idx_p;
1023 
1024   PetscFunctionBegin;
1025   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1026   mat->getrowactive = PETSC_TRUE;
1027 
1028   if (!mat->rowvalues && (idx || v)) {
1029     /*
1030         allocate enough space to hold information from the longest row.
1031     */
1032     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1033     int     max = 1,tmp;
1034     for (i=0; i<matin->m; i++) {
1035       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1036       if (max < tmp) { max = tmp; }
1037     }
1038     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1039     mat->rowindices = (int*)(mat->rowvalues + max);
1040   }
1041 
1042   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1043   lrow = row - rstart;
1044 
1045   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1046   if (!v)   {pvA = 0; pvB = 0;}
1047   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1048   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1049   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1050   nztot = nzA + nzB;
1051 
1052   cmap  = mat->garray;
1053   if (v  || idx) {
1054     if (nztot) {
1055       /* Sort by increasing column numbers, assuming A and B already sorted */
1056       int imark = -1;
1057       if (v) {
1058         *v = v_p = mat->rowvalues;
1059         for (i=0; i<nzB; i++) {
1060           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1061           else break;
1062         }
1063         imark = i;
1064         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1065         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1066       }
1067       if (idx) {
1068         *idx = idx_p = mat->rowindices;
1069         if (imark > -1) {
1070           for (i=0; i<imark; i++) {
1071             idx_p[i] = cmap[cworkB[i]];
1072           }
1073         } else {
1074           for (i=0; i<nzB; i++) {
1075             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1076             else break;
1077           }
1078           imark = i;
1079         }
1080         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1081         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1082       }
1083     } else {
1084       if (idx) *idx = 0;
1085       if (v)   *v   = 0;
1086     }
1087   }
1088   *nz = nztot;
1089   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1090   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1096 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1097 {
1098   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1099 
1100   PetscFunctionBegin;
1101   if (aij->getrowactive == PETSC_FALSE) {
1102     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1103   }
1104   aij->getrowactive = PETSC_FALSE;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatNorm_MPIAIJ"
1110 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1111 {
1112   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1113   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1114   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1115   PetscReal    sum = 0.0;
1116   PetscScalar  *v;
1117 
1118   PetscFunctionBegin;
1119   if (aij->size == 1) {
1120     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1121   } else {
1122     if (type == NORM_FROBENIUS) {
1123       v = amat->a;
1124       for (i=0; i<amat->nz; i++) {
1125 #if defined(PETSC_USE_COMPLEX)
1126         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1127 #else
1128         sum += (*v)*(*v); v++;
1129 #endif
1130       }
1131       v = bmat->a;
1132       for (i=0; i<bmat->nz; i++) {
1133 #if defined(PETSC_USE_COMPLEX)
1134         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1135 #else
1136         sum += (*v)*(*v); v++;
1137 #endif
1138       }
1139       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1140       *norm = sqrt(*norm);
1141     } else if (type == NORM_1) { /* max column norm */
1142       PetscReal *tmp,*tmp2;
1143       int    *jj,*garray = aij->garray;
1144       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1145       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1146       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1147       *norm = 0.0;
1148       v = amat->a; jj = amat->j;
1149       for (j=0; j<amat->nz; j++) {
1150         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1151       }
1152       v = bmat->a; jj = bmat->j;
1153       for (j=0; j<bmat->nz; j++) {
1154         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1155       }
1156       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1157       for (j=0; j<mat->N; j++) {
1158         if (tmp2[j] > *norm) *norm = tmp2[j];
1159       }
1160       ierr = PetscFree(tmp);CHKERRQ(ierr);
1161       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1162     } else if (type == NORM_INFINITY) { /* max row norm */
1163       PetscReal ntemp = 0.0;
1164       for (j=0; j<aij->A->m; j++) {
1165         v = amat->a + amat->i[j] + shift;
1166         sum = 0.0;
1167         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1168           sum += PetscAbsScalar(*v); v++;
1169         }
1170         v = bmat->a + bmat->i[j] + shift;
1171         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1172           sum += PetscAbsScalar(*v); v++;
1173         }
1174         if (sum > ntemp) ntemp = sum;
1175       }
1176       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1177     } else {
1178       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1179     }
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatTranspose_MPIAIJ"
1186 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1187 {
1188   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1189   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1190   int          ierr,shift = Aloc->indexshift;
1191   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1192   Mat          B;
1193   PetscScalar  *array;
1194 
1195   PetscFunctionBegin;
1196   if (!matout && M != N) {
1197     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1198   }
1199 
1200   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1201 
1202   /* copy over the A part */
1203   Aloc = (Mat_SeqAIJ*)a->A->data;
1204   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1205   row = a->rstart;
1206   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1207   for (i=0; i<m; i++) {
1208     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1209     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1210   }
1211   aj = Aloc->j;
1212   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1213 
1214   /* copy over the B part */
1215   Aloc = (Mat_SeqAIJ*)a->B->data;
1216   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1217   row  = a->rstart;
1218   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1219   ct   = cols;
1220   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1221   for (i=0; i<m; i++) {
1222     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1223     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1224   }
1225   ierr = PetscFree(ct);CHKERRQ(ierr);
1226   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1227   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   if (matout) {
1229     *matout = B;
1230   } else {
1231     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1238 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1239 {
1240   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat        a = aij->A,b = aij->B;
1242   int        ierr,s1,s2,s3;
1243 
1244   PetscFunctionBegin;
1245   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1246   if (rr) {
1247     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1248     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1249     /* Overlap communication with computation. */
1250     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1251   }
1252   if (ll) {
1253     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1254     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1255     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1256   }
1257   /* scale  the diagonal block */
1258   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1259 
1260   if (rr) {
1261     /* Do a scatter end and then right scale the off-diagonal block */
1262     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1263     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1264   }
1265 
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1272 int MatPrintHelp_MPIAIJ(Mat A)
1273 {
1274   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1275   int        ierr;
1276 
1277   PetscFunctionBegin;
1278   if (!a->rank) {
1279     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1286 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1287 {
1288   PetscFunctionBegin;
1289   *bs = 1;
1290   PetscFunctionReturn(0);
1291 }
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1294 int MatSetUnfactored_MPIAIJ(Mat A)
1295 {
1296   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1297   int        ierr;
1298 
1299   PetscFunctionBegin;
1300   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatEqual_MPIAIJ"
1306 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1307 {
1308   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1309   Mat        a,b,c,d;
1310   PetscTruth flg;
1311   int        ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1315   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1316   a = matA->A; b = matA->B;
1317   c = matB->A; d = matB->B;
1318 
1319   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1320   if (flg == PETSC_TRUE) {
1321     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1322   }
1323   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatCopy_MPIAIJ"
1329 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1330 {
1331   int        ierr;
1332   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1333   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1334   PetscTruth flg;
1335 
1336   PetscFunctionBegin;
1337   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1338   if (str != SAME_NONZERO_PATTERN || !flg) {
1339     /* because of the column compression in the off-processor part of the matrix a->B,
1340        the number of columns in a->B and b->B may be different, hence we cannot call
1341        the MatCopy() directly on the two parts. If need be, we can provide a more
1342        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1343        then copying the submatrices */
1344     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1345   } else {
1346     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1347     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1354 int MatSetUpPreallocation_MPIAIJ(Mat A)
1355 {
1356   int        ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1364 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1365 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1366 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1367 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1368 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1369 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1370 #endif
1371 
1372 /* -------------------------------------------------------------------*/
1373 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1374        MatGetRow_MPIAIJ,
1375        MatRestoreRow_MPIAIJ,
1376        MatMult_MPIAIJ,
1377        MatMultAdd_MPIAIJ,
1378        MatMultTranspose_MPIAIJ,
1379        MatMultTransposeAdd_MPIAIJ,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        MatRelax_MPIAIJ,
1387        MatTranspose_MPIAIJ,
1388        MatGetInfo_MPIAIJ,
1389        MatEqual_MPIAIJ,
1390        MatGetDiagonal_MPIAIJ,
1391        MatDiagonalScale_MPIAIJ,
1392        MatNorm_MPIAIJ,
1393        MatAssemblyBegin_MPIAIJ,
1394        MatAssemblyEnd_MPIAIJ,
1395        0,
1396        MatSetOption_MPIAIJ,
1397        MatZeroEntries_MPIAIJ,
1398        MatZeroRows_MPIAIJ,
1399 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1400        MatLUFactorSymbolic_MPIAIJ_TFS,
1401 #else
1402        0,
1403 #endif
1404        0,
1405        0,
1406        0,
1407        MatSetUpPreallocation_MPIAIJ,
1408        0,
1409        0,
1410        0,
1411        0,
1412        MatDuplicate_MPIAIJ,
1413        0,
1414        0,
1415        0,
1416        0,
1417        0,
1418        MatGetSubMatrices_MPIAIJ,
1419        MatIncreaseOverlap_MPIAIJ,
1420        MatGetValues_MPIAIJ,
1421        MatCopy_MPIAIJ,
1422        MatPrintHelp_MPIAIJ,
1423        MatScale_MPIAIJ,
1424        0,
1425        0,
1426        0,
1427        MatGetBlockSize_MPIAIJ,
1428        0,
1429        0,
1430        0,
1431        0,
1432        MatFDColoringCreate_MPIAIJ,
1433        0,
1434        MatSetUnfactored_MPIAIJ,
1435        0,
1436        0,
1437        MatGetSubMatrix_MPIAIJ,
1438        MatDestroy_MPIAIJ,
1439        MatView_MPIAIJ,
1440        MatGetPetscMaps_Petsc,
1441        0,
1442        0,
1443        0,
1444        0,
1445        0,
1446        0,
1447        0,
1448        0,
1449        MatSetColoring_MPIAIJ,
1450        MatSetValuesAdic_MPIAIJ,
1451        MatSetValuesAdifor_MPIAIJ
1452 };
1453 
1454 /* ----------------------------------------------------------------------------------------*/
1455 
1456 EXTERN_C_BEGIN
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1459 int MatStoreValues_MPIAIJ(Mat mat)
1460 {
1461   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1462   int        ierr;
1463 
1464   PetscFunctionBegin;
1465   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1466   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 EXTERN_C_END
1470 
1471 EXTERN_C_BEGIN
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1474 int MatRetrieveValues_MPIAIJ(Mat mat)
1475 {
1476   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1477   int        ierr;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1481   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 EXTERN_C_END
1485 
1486 #include "petscpc.h"
1487 EXTERN_C_BEGIN
1488 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1489 EXTERN_C_END
1490 
1491 EXTERN_C_BEGIN
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatCreate_MPIAIJ"
1494 int MatCreate_MPIAIJ(Mat B)
1495 {
1496   Mat_MPIAIJ   *b;
1497   int          ierr,i,size;
1498 #if defined(PETSC_HAVE_SUPERLUDIST)
1499   PetscTruth   flg;
1500 #endif
1501 
1502   PetscFunctionBegin;
1503   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1504 
1505   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1506   B->data         = (void*)b;
1507   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1508   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1509   B->factor       = 0;
1510   B->assembled    = PETSC_FALSE;
1511   B->mapping      = 0;
1512 
1513   B->insertmode      = NOT_SET_VALUES;
1514   b->size            = size;
1515   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1516 
1517   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1518   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1519 
1520   /* the information in the maps duplicates the information computed below, eventually
1521      we should remove the duplicate information that is not contained in the maps */
1522   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1523   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1524 
1525   /* build local table of row and column ownerships */
1526   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1527   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1528   b->cowners = b->rowners + b->size + 2;
1529   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1530   b->rowners[0] = 0;
1531   for (i=2; i<=b->size; i++) {
1532     b->rowners[i] += b->rowners[i-1];
1533   }
1534   b->rstart = b->rowners[b->rank];
1535   b->rend   = b->rowners[b->rank+1];
1536   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1537   b->cowners[0] = 0;
1538   for (i=2; i<=b->size; i++) {
1539     b->cowners[i] += b->cowners[i-1];
1540   }
1541   b->cstart = b->cowners[b->rank];
1542   b->cend   = b->cowners[b->rank+1];
1543 
1544   /* build cache for off array entries formed */
1545   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1546   b->donotstash  = PETSC_FALSE;
1547   b->colmap      = 0;
1548   b->garray      = 0;
1549   b->roworiented = PETSC_TRUE;
1550 
1551   /* stuff used for matrix vector multiply */
1552   b->lvec      = PETSC_NULL;
1553   b->Mvctx     = PETSC_NULL;
1554 
1555   /* stuff for MatGetRow() */
1556   b->rowindices   = 0;
1557   b->rowvalues    = 0;
1558   b->getrowactive = PETSC_FALSE;
1559 
1560 /* #if defined(PETSC_HAVE_SUPERLUDIST)
1561 
1562   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1563   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1564 #endif */
1565 
1566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1567                                      "MatStoreValues_MPIAIJ",
1568                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1569   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1570                                      "MatRetrieveValues_MPIAIJ",
1571                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1573                                      "MatGetDiagonalBlock_MPIAIJ",
1574                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1575 
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1582 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1583 {
1584   Mat        mat;
1585   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1586   int        ierr;
1587 
1588   PetscFunctionBegin;
1589   *newmat       = 0;
1590   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1591   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1592   a    = (Mat_MPIAIJ*)mat->data;
1593   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1594   mat->factor       = matin->factor;
1595   mat->assembled    = PETSC_TRUE;
1596   mat->insertmode   = NOT_SET_VALUES;
1597   mat->preallocated = PETSC_TRUE;
1598 
1599   a->rstart       = oldmat->rstart;
1600   a->rend         = oldmat->rend;
1601   a->cstart       = oldmat->cstart;
1602   a->cend         = oldmat->cend;
1603   a->size         = oldmat->size;
1604   a->rank         = oldmat->rank;
1605   a->donotstash   = oldmat->donotstash;
1606   a->roworiented  = oldmat->roworiented;
1607   a->rowindices   = 0;
1608   a->rowvalues    = 0;
1609   a->getrowactive = PETSC_FALSE;
1610 
1611   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1612   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1613   if (oldmat->colmap) {
1614 #if defined (PETSC_USE_CTABLE)
1615     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1616 #else
1617     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1618     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1619     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1620 #endif
1621   } else a->colmap = 0;
1622   if (oldmat->garray) {
1623     int len;
1624     len  = oldmat->B->n;
1625     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1626     PetscLogObjectMemory(mat,len*sizeof(int));
1627     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1628   } else a->garray = 0;
1629 
1630   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1631   PetscLogObjectParent(mat,a->lvec);
1632   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1633   PetscLogObjectParent(mat,a->Mvctx);
1634   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1635   PetscLogObjectParent(mat,a->A);
1636   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1637   PetscLogObjectParent(mat,a->B);
1638   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1639   *newmat = mat;
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #include "petscsys.h"
1644 
1645 EXTERN_C_BEGIN
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatLoad_MPIAIJ"
1648 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1649 {
1650   Mat          A;
1651   PetscScalar  *vals,*svals;
1652   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1653   MPI_Status   status;
1654   int          i,nz,ierr,j,rstart,rend,fd;
1655   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1656   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1657   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1658 
1659   PetscFunctionBegin;
1660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1661   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1662   if (!rank) {
1663     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1664     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1665     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1666     if (header[3] < 0) {
1667       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1668     }
1669   }
1670 
1671   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1672   M = header[1]; N = header[2];
1673   /* determine ownership of all rows */
1674   m = M/size + ((M % size) > rank);
1675   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1676   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1677   rowners[0] = 0;
1678   for (i=2; i<=size; i++) {
1679     rowners[i] += rowners[i-1];
1680   }
1681   rstart = rowners[rank];
1682   rend   = rowners[rank+1];
1683 
1684   /* distribute row lengths to all processors */
1685   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1686   offlens = ourlens + (rend-rstart);
1687   if (!rank) {
1688     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1689     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1690     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1691     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1692     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1693     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1694   } else {
1695     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1696   }
1697 
1698   if (!rank) {
1699     /* calculate the number of nonzeros on each processor */
1700     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1701     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1702     for (i=0; i<size; i++) {
1703       for (j=rowners[i]; j< rowners[i+1]; j++) {
1704         procsnz[i] += rowlengths[j];
1705       }
1706     }
1707     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1708 
1709     /* determine max buffer needed and allocate it */
1710     maxnz = 0;
1711     for (i=0; i<size; i++) {
1712       maxnz = PetscMax(maxnz,procsnz[i]);
1713     }
1714     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1715 
1716     /* read in my part of the matrix column indices  */
1717     nz   = procsnz[0];
1718     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1719     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1720 
1721     /* read in every one elses and ship off */
1722     for (i=1; i<size; i++) {
1723       nz   = procsnz[i];
1724       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1725       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1726     }
1727     ierr = PetscFree(cols);CHKERRQ(ierr);
1728   } else {
1729     /* determine buffer space needed for message */
1730     nz = 0;
1731     for (i=0; i<m; i++) {
1732       nz += ourlens[i];
1733     }
1734     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1735 
1736     /* receive message of column indices*/
1737     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1738     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1739     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1740   }
1741 
1742   /* determine column ownership if matrix is not square */
1743   if (N != M) {
1744     n      = N/size + ((N % size) > rank);
1745     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1746     cstart = cend - n;
1747   } else {
1748     cstart = rstart;
1749     cend   = rend;
1750     n      = cend - cstart;
1751   }
1752 
1753   /* loop over local rows, determining number of off diagonal entries */
1754   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1755   jj = 0;
1756   for (i=0; i<m; i++) {
1757     for (j=0; j<ourlens[i]; j++) {
1758       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1759       jj++;
1760     }
1761   }
1762 
1763   /* create our matrix */
1764   for (i=0; i<m; i++) {
1765     ourlens[i] -= offlens[i];
1766   }
1767   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1768   A = *newmat;
1769   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1770   for (i=0; i<m; i++) {
1771     ourlens[i] += offlens[i];
1772   }
1773 
1774   if (!rank) {
1775     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1776 
1777     /* read in my part of the matrix numerical values  */
1778     nz   = procsnz[0];
1779     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1780 
1781     /* insert into matrix */
1782     jj      = rstart;
1783     smycols = mycols;
1784     svals   = vals;
1785     for (i=0; i<m; i++) {
1786       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1787       smycols += ourlens[i];
1788       svals   += ourlens[i];
1789       jj++;
1790     }
1791 
1792     /* read in other processors and ship out */
1793     for (i=1; i<size; i++) {
1794       nz   = procsnz[i];
1795       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1796       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1797     }
1798     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1799   } else {
1800     /* receive numeric values */
1801     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1802 
1803     /* receive message of values*/
1804     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1805     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1806     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1807 
1808     /* insert into matrix */
1809     jj      = rstart;
1810     smycols = mycols;
1811     svals   = vals;
1812     for (i=0; i<m; i++) {
1813       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1814       smycols += ourlens[i];
1815       svals   += ourlens[i];
1816       jj++;
1817     }
1818   }
1819   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1820   ierr = PetscFree(vals);CHKERRQ(ierr);
1821   ierr = PetscFree(mycols);CHKERRQ(ierr);
1822   ierr = PetscFree(rowners);CHKERRQ(ierr);
1823 
1824   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1825   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 EXTERN_C_END
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1832 /*
1833     Not great since it makes two copies of the submatrix, first an SeqAIJ
1834   in local and then by concatenating the local matrices the end result.
1835   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1836 */
1837 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1838 {
1839   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1840   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1841   Mat          *local,M,Mreuse;
1842   PetscScalar  *vwork,*aa;
1843   MPI_Comm     comm = mat->comm;
1844   Mat_SeqAIJ   *aij;
1845 
1846 
1847   PetscFunctionBegin;
1848   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1849   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1850 
1851   if (call ==  MAT_REUSE_MATRIX) {
1852     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1853     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1854     local = &Mreuse;
1855     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1856   } else {
1857     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1858     Mreuse = *local;
1859     ierr   = PetscFree(local);CHKERRQ(ierr);
1860   }
1861 
1862   /*
1863       m - number of local rows
1864       n - number of columns (same on all processors)
1865       rstart - first row in new global matrix generated
1866   */
1867   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1868   if (call == MAT_INITIAL_MATRIX) {
1869     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1870     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1871     ii  = aij->i;
1872     jj  = aij->j;
1873 
1874     /*
1875         Determine the number of non-zeros in the diagonal and off-diagonal
1876         portions of the matrix in order to do correct preallocation
1877     */
1878 
1879     /* first get start and end of "diagonal" columns */
1880     if (csize == PETSC_DECIDE) {
1881       nlocal = n/size + ((n % size) > rank);
1882     } else {
1883       nlocal = csize;
1884     }
1885     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1886     rstart = rend - nlocal;
1887     if (rank == size - 1 && rend != n) {
1888       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1889     }
1890 
1891     /* next, compute all the lengths */
1892     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1893     olens = dlens + m;
1894     for (i=0; i<m; i++) {
1895       jend = ii[i+1] - ii[i];
1896       olen = 0;
1897       dlen = 0;
1898       for (j=0; j<jend; j++) {
1899         if (*jj < rstart || *jj >= rend) olen++;
1900         else dlen++;
1901         jj++;
1902       }
1903       olens[i] = olen;
1904       dlens[i] = dlen;
1905     }
1906     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1907     ierr = PetscFree(dlens);CHKERRQ(ierr);
1908   } else {
1909     int ml,nl;
1910 
1911     M = *newmat;
1912     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1913     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1914     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1915     /*
1916          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1917        rather than the slower MatSetValues().
1918     */
1919     M->was_assembled = PETSC_TRUE;
1920     M->assembled     = PETSC_FALSE;
1921   }
1922   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1923   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1924   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1925   ii  = aij->i;
1926   jj  = aij->j;
1927   aa  = aij->a;
1928   for (i=0; i<m; i++) {
1929     row   = rstart + i;
1930     nz    = ii[i+1] - ii[i];
1931     cwork = jj;     jj += nz;
1932     vwork = aa;     aa += nz;
1933     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1934   }
1935 
1936   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1938   *newmat = M;
1939 
1940   /* save submatrix used in processor for next request */
1941   if (call ==  MAT_INITIAL_MATRIX) {
1942     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1943     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1944   }
1945 
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1951 /*@C
1952    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1953    (the default parallel PETSc format).  For good matrix assembly performance
1954    the user should preallocate the matrix storage by setting the parameters
1955    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1956    performance can be increased by more than a factor of 50.
1957 
1958    Collective on MPI_Comm
1959 
1960    Input Parameters:
1961 +  A - the matrix
1962 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1963            (same value is used for all local rows)
1964 .  d_nnz - array containing the number of nonzeros in the various rows of the
1965            DIAGONAL portion of the local submatrix (possibly different for each row)
1966            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1967            The size of this array is equal to the number of local rows, i.e 'm'.
1968            You must leave room for the diagonal entry even if it is zero.
1969 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1970            submatrix (same value is used for all local rows).
1971 -  o_nnz - array containing the number of nonzeros in the various rows of the
1972            OFF-DIAGONAL portion of the local submatrix (possibly different for
1973            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1974            structure. The size of this array is equal to the number
1975            of local rows, i.e 'm'.
1976 
1977    The AIJ format (also called the Yale sparse matrix format or
1978    compressed row storage), is fully compatible with standard Fortran 77
1979    storage.  That is, the stored row and column indices can begin at
1980    either one (as in Fortran) or zero.  See the users manual for details.
1981 
1982    The user MUST specify either the local or global matrix dimensions
1983    (possibly both).
1984 
1985    The parallel matrix is partitioned such that the first m0 rows belong to
1986    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1987    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1988 
1989    The DIAGONAL portion of the local submatrix of a processor can be defined
1990    as the submatrix which is obtained by extraction the part corresponding
1991    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1992    first row that belongs to the processor, and r2 is the last row belonging
1993    to the this processor. This is a square mxm matrix. The remaining portion
1994    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1995 
1996    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1997 
1998    By default, this format uses inodes (identical nodes) when possible.
1999    We search for consecutive rows with the same nonzero structure, thereby
2000    reusing matrix information to achieve increased efficiency.
2001 
2002    Options Database Keys:
2003 +  -mat_aij_no_inode  - Do not use inodes
2004 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2005 -  -mat_aij_oneindex - Internally use indexing starting at 1
2006         rather than 0.  Note that when calling MatSetValues(),
2007         the user still MUST index entries starting at 0!
2008 
2009    Example usage:
2010 
2011    Consider the following 8x8 matrix with 34 non-zero values, that is
2012    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2013    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2014    as follows:
2015 
2016 .vb
2017             1  2  0  |  0  3  0  |  0  4
2018     Proc0   0  5  6  |  7  0  0  |  8  0
2019             9  0 10  | 11  0  0  | 12  0
2020     -------------------------------------
2021            13  0 14  | 15 16 17  |  0  0
2022     Proc1   0 18  0  | 19 20 21  |  0  0
2023             0  0  0  | 22 23  0  | 24  0
2024     -------------------------------------
2025     Proc2  25 26 27  |  0  0 28  | 29  0
2026            30  0  0  | 31 32 33  |  0 34
2027 .ve
2028 
2029    This can be represented as a collection of submatrices as:
2030 
2031 .vb
2032       A B C
2033       D E F
2034       G H I
2035 .ve
2036 
2037    Where the submatrices A,B,C are owned by proc0, D,E,F are
2038    owned by proc1, G,H,I are owned by proc2.
2039 
2040    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2041    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2042    The 'M','N' parameters are 8,8, and have the same values on all procs.
2043 
2044    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2045    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2046    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2047    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2048    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2049    matrix, ans [DF] as another SeqAIJ matrix.
2050 
2051    When d_nz, o_nz parameters are specified, d_nz storage elements are
2052    allocated for every row of the local diagonal submatrix, and o_nz
2053    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2054    One way to choose d_nz and o_nz is to use the max nonzerors per local
2055    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2056    In this case, the values of d_nz,o_nz are:
2057 .vb
2058      proc0 : dnz = 2, o_nz = 2
2059      proc1 : dnz = 3, o_nz = 2
2060      proc2 : dnz = 1, o_nz = 4
2061 .ve
2062    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2063    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2064    for proc3. i.e we are using 12+15+10=37 storage locations to store
2065    34 values.
2066 
2067    When d_nnz, o_nnz parameters are specified, the storage is specified
2068    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2069    In the above case the values for d_nnz,o_nnz are:
2070 .vb
2071      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2072      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2073      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2074 .ve
2075    Here the space allocated is sum of all the above values i.e 34, and
2076    hence pre-allocation is perfect.
2077 
2078    Level: intermediate
2079 
2080 .keywords: matrix, aij, compressed row, sparse, parallel
2081 
2082 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2083 @*/
2084 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2085 {
2086   Mat_MPIAIJ   *b;
2087   int          ierr,i;
2088   PetscTruth   flg2;
2089 
2090   PetscFunctionBegin;
2091   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2092   if (!flg2) PetscFunctionReturn(0);
2093   B->preallocated = PETSC_TRUE;
2094   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2095   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2096   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2097   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2098   if (d_nnz) {
2099     for (i=0; i<B->m; i++) {
2100       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2101     }
2102   }
2103   if (o_nnz) {
2104     for (i=0; i<B->m; i++) {
2105       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2106     }
2107   }
2108   b = (Mat_MPIAIJ*)B->data;
2109 
2110   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2111   PetscLogObjectParent(B,b->A);
2112   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2113   PetscLogObjectParent(B,b->B);
2114 
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCreateMPIAIJ"
2120 /*@C
2121    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2122    (the default parallel PETSc format).  For good matrix assembly performance
2123    the user should preallocate the matrix storage by setting the parameters
2124    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2125    performance can be increased by more than a factor of 50.
2126 
2127    Collective on MPI_Comm
2128 
2129    Input Parameters:
2130 +  comm - MPI communicator
2131 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2132            This value should be the same as the local size used in creating the
2133            y vector for the matrix-vector product y = Ax.
2134 .  n - This value should be the same as the local size used in creating the
2135        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2136        calculated if N is given) For square matrices n is almost always m.
2137 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2138 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2139 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2140            (same value is used for all local rows)
2141 .  d_nnz - array containing the number of nonzeros in the various rows of the
2142            DIAGONAL portion of the local submatrix (possibly different for each row)
2143            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2144            The size of this array is equal to the number of local rows, i.e 'm'.
2145            You must leave room for the diagonal entry even if it is zero.
2146 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2147            submatrix (same value is used for all local rows).
2148 -  o_nnz - array containing the number of nonzeros in the various rows of the
2149            OFF-DIAGONAL portion of the local submatrix (possibly different for
2150            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2151            structure. The size of this array is equal to the number
2152            of local rows, i.e 'm'.
2153 
2154    Output Parameter:
2155 .  A - the matrix
2156 
2157    Notes:
2158    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2159    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2160    storage requirements for this matrix.
2161 
2162    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2163    processor than it must be used on all processors that share the object for
2164    that argument.
2165 
2166    The AIJ format (also called the Yale sparse matrix format or
2167    compressed row storage), is fully compatible with standard Fortran 77
2168    storage.  That is, the stored row and column indices can begin at
2169    either one (as in Fortran) or zero.  See the users manual for details.
2170 
2171    The user MUST specify either the local or global matrix dimensions
2172    (possibly both).
2173 
2174    The parallel matrix is partitioned such that the first m0 rows belong to
2175    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2176    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2177 
2178    The DIAGONAL portion of the local submatrix of a processor can be defined
2179    as the submatrix which is obtained by extraction the part corresponding
2180    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2181    first row that belongs to the processor, and r2 is the last row belonging
2182    to the this processor. This is a square mxm matrix. The remaining portion
2183    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2184 
2185    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2186 
2187    By default, this format uses inodes (identical nodes) when possible.
2188    We search for consecutive rows with the same nonzero structure, thereby
2189    reusing matrix information to achieve increased efficiency.
2190 
2191    Options Database Keys:
2192 +  -mat_aij_no_inode  - Do not use inodes
2193 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2194 -  -mat_aij_oneindex - Internally use indexing starting at 1
2195         rather than 0.  Note that when calling MatSetValues(),
2196         the user still MUST index entries starting at 0!
2197 
2198 
2199    Example usage:
2200 
2201    Consider the following 8x8 matrix with 34 non-zero values, that is
2202    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2203    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2204    as follows:
2205 
2206 .vb
2207             1  2  0  |  0  3  0  |  0  4
2208     Proc0   0  5  6  |  7  0  0  |  8  0
2209             9  0 10  | 11  0  0  | 12  0
2210     -------------------------------------
2211            13  0 14  | 15 16 17  |  0  0
2212     Proc1   0 18  0  | 19 20 21  |  0  0
2213             0  0  0  | 22 23  0  | 24  0
2214     -------------------------------------
2215     Proc2  25 26 27  |  0  0 28  | 29  0
2216            30  0  0  | 31 32 33  |  0 34
2217 .ve
2218 
2219    This can be represented as a collection of submatrices as:
2220 
2221 .vb
2222       A B C
2223       D E F
2224       G H I
2225 .ve
2226 
2227    Where the submatrices A,B,C are owned by proc0, D,E,F are
2228    owned by proc1, G,H,I are owned by proc2.
2229 
2230    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2231    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2232    The 'M','N' parameters are 8,8, and have the same values on all procs.
2233 
2234    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2235    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2236    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2237    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2238    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2239    matrix, ans [DF] as another SeqAIJ matrix.
2240 
2241    When d_nz, o_nz parameters are specified, d_nz storage elements are
2242    allocated for every row of the local diagonal submatrix, and o_nz
2243    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2244    One way to choose d_nz and o_nz is to use the max nonzerors per local
2245    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2246    In this case, the values of d_nz,o_nz are:
2247 .vb
2248      proc0 : dnz = 2, o_nz = 2
2249      proc1 : dnz = 3, o_nz = 2
2250      proc2 : dnz = 1, o_nz = 4
2251 .ve
2252    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2253    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2254    for proc3. i.e we are using 12+15+10=37 storage locations to store
2255    34 values.
2256 
2257    When d_nnz, o_nnz parameters are specified, the storage is specified
2258    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2259    In the above case the values for d_nnz,o_nnz are:
2260 .vb
2261      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2262      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2263      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2264 .ve
2265    Here the space allocated is sum of all the above values i.e 34, and
2266    hence pre-allocation is perfect.
2267 
2268    Level: intermediate
2269 
2270 .keywords: matrix, aij, compressed row, sparse, parallel
2271 
2272 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2273 @*/
2274 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2275 {
2276   int ierr,size;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2280   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2281   if (size > 1) {
2282     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2283     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2284   } else {
2285     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2286     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2293 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2294 {
2295   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2296   PetscFunctionBegin;
2297   *Ad     = a->A;
2298   *Ao     = a->B;
2299   *colmap = a->garray;
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2305 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2306 {
2307   int        ierr;
2308   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2309 
2310   PetscFunctionBegin;
2311   if (coloring->ctype == IS_COLORING_LOCAL) {
2312     int        *allcolors,*colors,i;
2313     ISColoring ocoloring;
2314 
2315     /* set coloring for diagonal portion */
2316     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2317 
2318     /* set coloring for off-diagonal portion */
2319     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2320     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2321     for (i=0; i<a->B->n; i++) {
2322       colors[i] = allcolors[a->garray[i]];
2323     }
2324     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2325     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2326     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2327     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2328   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2329     int        *colors,i,*larray;
2330     ISColoring ocoloring;
2331 
2332     /* set coloring for diagonal portion */
2333     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2334     for (i=0; i<a->A->n; i++) {
2335       larray[i] = i + a->cstart;
2336     }
2337     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2338     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2339     for (i=0; i<a->A->n; i++) {
2340       colors[i] = coloring->colors[larray[i]];
2341     }
2342     ierr = PetscFree(larray);CHKERRQ(ierr);
2343     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2344     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2345     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2346 
2347     /* set coloring for off-diagonal portion */
2348     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2349     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2350     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2351     for (i=0; i<a->B->n; i++) {
2352       colors[i] = coloring->colors[larray[i]];
2353     }
2354     ierr = PetscFree(larray);CHKERRQ(ierr);
2355     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2356     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2357     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2358   } else {
2359     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2360   }
2361 
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2367 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2368 {
2369   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2370   int        ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2374   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2380 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2381 {
2382   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2383   int        ierr;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2387   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390