xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4b27c08ac4c7fe27d83aaa07ba0dc5abca4ca6c8)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 EXTERN int MatUseSpooles_MPIAIJ(Mat);
15 
16 /*
17   Local utility routine that creates a mapping from the global column
18 number to the local number in the off-diagonal part of the local
19 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
20 a slightly higher hash table cost; without it it is not scalable (each processor
21 has an order N integer array but is fast to acess.
22 */
23 #undef __FUNCT__
24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
25 int CreateColmap_MPIAIJ_Private(Mat mat)
26 {
27   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
28   int        n = aij->B->n,i,ierr;
29 
30   PetscFunctionBegin;
31 #if defined (PETSC_USE_CTABLE)
32   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
33   for (i=0; i<n; i++){
34     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
35   }
36 #else
37   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
38   PetscLogObjectMemory(mat,mat->N*sizeof(int));
39   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
41 #endif
42   PetscFunctionReturn(0);
43 }
44 
45 #define CHUNKSIZE   15
46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
47 { \
48  \
49     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
50     rmax = aimax[row]; nrow = ailen[row];  \
51     col1 = col - shift; \
52      \
53     low = 0; high = nrow; \
54     while (high-low > 5) { \
55       t = (low+high)/2; \
56       if (rp[t] > col) high = t; \
57       else             low  = t; \
58     } \
59       for (_i=low; _i<high; _i++) { \
60         if (rp[_i] > col1) break; \
61         if (rp[_i] == col1) { \
62           if (addv == ADD_VALUES) ap[_i] += value;   \
63           else                  ap[_i] = value; \
64           goto a_noinsert; \
65         } \
66       }  \
67       if (nonew == 1) goto a_noinsert; \
68       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
69       if (nrow >= rmax) { \
70         /* there is no extra room in row, therefore enlarge */ \
71         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
72         PetscScalar *new_a; \
73  \
74         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
75  \
76         /* malloc new storage space */ \
77         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
78         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
79         new_j   = (int*)(new_a + new_nz); \
80         new_i   = new_j + new_nz; \
81  \
82         /* copy over old data into new slots */ \
83         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
84         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
85         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
86         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
87         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
88                                                            len*sizeof(int));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
90         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
91                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
92         /* free up old matrix storage */ \
93  \
94         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
95         if (!a->singlemalloc) { \
96            ierr = PetscFree(a->i);CHKERRQ(ierr); \
97            ierr = PetscFree(a->j);CHKERRQ(ierr); \
98         } \
99         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
100         a->singlemalloc = PETSC_TRUE; \
101  \
102         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
103         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
104         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
105         a->maxnz += CHUNKSIZE; \
106         a->reallocs++; \
107       } \
108       N = nrow++ - 1; a->nz++; \
109       /* shift up all the later entries in this row */ \
110       for (ii=N; ii>=_i; ii--) { \
111         rp[ii+1] = rp[ii]; \
112         ap[ii+1] = ap[ii]; \
113       } \
114       rp[_i] = col1;  \
115       ap[_i] = value;  \
116       a_noinsert: ; \
117       ailen[row] = nrow; \
118 }
119 
120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
121 { \
122  \
123     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
124     rmax = bimax[row]; nrow = bilen[row];  \
125     col1 = col - shift; \
126      \
127     low = 0; high = nrow; \
128     while (high-low > 5) { \
129       t = (low+high)/2; \
130       if (rp[t] > col) high = t; \
131       else             low  = t; \
132     } \
133        for (_i=low; _i<high; _i++) { \
134         if (rp[_i] > col1) break; \
135         if (rp[_i] == col1) { \
136           if (addv == ADD_VALUES) ap[_i] += value;   \
137           else                  ap[_i] = value; \
138           goto b_noinsert; \
139         } \
140       }  \
141       if (nonew == 1) goto b_noinsert; \
142       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
143       if (nrow >= rmax) { \
144         /* there is no extra room in row, therefore enlarge */ \
145         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
146         PetscScalar *new_a; \
147  \
148         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
149  \
150         /* malloc new storage space */ \
151         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
152         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
153         new_j   = (int*)(new_a + new_nz); \
154         new_i   = new_j + new_nz; \
155  \
156         /* copy over old data into new slots */ \
157         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
158         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
159         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
160         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
161         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
162                                                            len*sizeof(int));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
164         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
165                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
166         /* free up old matrix storage */ \
167  \
168         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
169         if (!b->singlemalloc) { \
170           ierr = PetscFree(b->i);CHKERRQ(ierr); \
171           ierr = PetscFree(b->j);CHKERRQ(ierr); \
172         } \
173         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
174         b->singlemalloc = PETSC_TRUE; \
175  \
176         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
177         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
178         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
179         b->maxnz += CHUNKSIZE; \
180         b->reallocs++; \
181       } \
182       N = nrow++ - 1; b->nz++; \
183       /* shift up all the later entries in this row */ \
184       for (ii=N; ii>=_i; ii--) { \
185         rp[ii+1] = rp[ii]; \
186         ap[ii+1] = ap[ii]; \
187       } \
188       rp[_i] = col1;  \
189       ap[_i] = value;  \
190       b_noinsert: ; \
191       bilen[row] = nrow; \
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatSetValues_MPIAIJ"
196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
197 {
198   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
199   PetscScalar  value;
200   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
201   int          cstart = aij->cstart,cend = aij->cend,row,col;
202   PetscTruth   roworiented = aij->roworiented;
203 
204   /* Some Variables required in the macro */
205   Mat          A = aij->A;
206   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
207   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
208   PetscScalar  *aa = a->a;
209   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
210   Mat          B = aij->B;
211   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
212   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
213   PetscScalar  *ba = b->a;
214 
215   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
216   int          nonew = a->nonew,shift = a->indexshift;
217   PetscScalar  *ap;
218 
219   PetscFunctionBegin;
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_BOPT_g)
223     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
224 #endif
225     if (im[i] >= rstart && im[i] < rend) {
226       row = im[i] - rstart;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart && in[j] < cend){
229           col = in[j] - cstart;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           if (ignorezeroentries && value == 0.0) continue;
232           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
291     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
296         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   PetscScalar *val;
358   InsertMode  addv = mat->insertmode;
359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
360   PetscTruth  flag;
361 #endif
362 
363   PetscFunctionBegin;
364   if (!aij->donotstash) {
365     while (1) {
366       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
367       if (!flg) break;
368 
369       for (i=0; i<n;) {
370         /* Now identify the consecutive vals belonging to the same row */
371         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
372         if (j < n) ncols = j-i;
373         else       ncols = n-i;
374         /* Now assemble all these values with a single function call */
375         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
376         i = j;
377       }
378     }
379     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
380   }
381 
382   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
383   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
384 
385   /* determine if any processor has disassembled, if so we must
386      also disassemble ourselfs, in order that we may reassemble. */
387   /*
388      if nonzero structure of submatrix B cannot change then we know that
389      no processor disassembled thus we can skip this stuff
390   */
391   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
392     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
393     if (mat->was_assembled && !other_disassembled) {
394       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
395     }
396   }
397 
398   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
399     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
400   }
401   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
402   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
403 
404   if (aij->rowvalues) {
405     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
406     aij->rowvalues = 0;
407   }
408 #if defined(PETSC_HAVE_SUPERLUDIST)
409   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
410   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
411 #endif
412 
413 #if defined(PETSC_HAVE_SPOOLES)
414   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
415   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
416 #endif
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
422 int MatZeroEntries_MPIAIJ(Mat A)
423 {
424   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
425   int        ierr;
426 
427   PetscFunctionBegin;
428   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
429   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatZeroRows_MPIAIJ"
435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
436 {
437   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
438   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
439   int            *procs,*nprocs,j,idx,nsends,*work,row;
440   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
441   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
442   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
443   MPI_Comm       comm = A->comm;
444   MPI_Request    *send_waits,*recv_waits;
445   MPI_Status     recv_status,*send_status;
446   IS             istmp;
447   PetscTruth     found;
448 
449   PetscFunctionBegin;
450   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
451   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
452 
453   /*  first count number of contributors to each processor */
454   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
455   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
456   procs  = nprocs + size;
457   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
458   for (i=0; i<N; i++) {
459     idx = rows[i];
460     found = PETSC_FALSE;
461     for (j=0; j<size; j++) {
462       if (idx >= owners[j] && idx < owners[j+1]) {
463         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
464       }
465     }
466     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
467   }
468   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
469 
470   /* inform other processors of number of messages and max length*/
471   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
472   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
473   nrecvs = work[size+rank];
474   nmax   = work[rank];
475   ierr   = PetscFree(work);CHKERRQ(ierr);
476 
477   /* post receives:   */
478   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
480   for (i=0; i<nrecvs; i++) {
481     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
482   }
483 
484   /* do sends:
485       1) starts[i] gives the starting index in svalues for stuff going to
486          the ith processor
487   */
488   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
489   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
490   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
491   starts[0] = 0;
492   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
493   for (i=0; i<N; i++) {
494     svalues[starts[owner[i]]++] = rows[i];
495   }
496   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
497 
498   starts[0] = 0;
499   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
500   count = 0;
501   for (i=0; i<size; i++) {
502     if (procs[i]) {
503       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
504     }
505   }
506   ierr = PetscFree(starts);CHKERRQ(ierr);
507 
508   base = owners[rank];
509 
510   /*  wait on receives */
511   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
512   source = lens + nrecvs;
513   count  = nrecvs; slen = 0;
514   while (count) {
515     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
516     /* unpack receives into our local space */
517     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
518     source[imdex]  = recv_status.MPI_SOURCE;
519     lens[imdex]    = n;
520     slen          += n;
521     count--;
522   }
523   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
524 
525   /* move the data into the send scatter */
526   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
527   count = 0;
528   for (i=0; i<nrecvs; i++) {
529     values = rvalues + i*nmax;
530     for (j=0; j<lens[i]; j++) {
531       lrows[count++] = values[j] - base;
532     }
533   }
534   ierr = PetscFree(rvalues);CHKERRQ(ierr);
535   ierr = PetscFree(lens);CHKERRQ(ierr);
536   ierr = PetscFree(owner);CHKERRQ(ierr);
537   ierr = PetscFree(nprocs);CHKERRQ(ierr);
538 
539   /* actually zap the local rows */
540   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
541   PetscLogObjectParent(A,istmp);
542 
543   /*
544         Zero the required rows. If the "diagonal block" of the matrix
545      is square and the user wishes to set the diagonal we use seperate
546      code so that MatSetValues() is not called for each diagonal allocating
547      new memory, thus calling lots of mallocs and slowing things down.
548 
549        Contributed by: Mathew Knepley
550   */
551   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
552   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
553   if (diag && (l->A->M == l->A->N)) {
554     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
555   } else if (diag) {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
558       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
560     }
561     for (i = 0; i < slen; i++) {
562       row  = lrows[i] + rstart;
563       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
564     }
565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   } else {
568     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
569   }
570   ierr = ISDestroy(istmp);CHKERRQ(ierr);
571   ierr = PetscFree(lrows);CHKERRQ(ierr);
572 
573   /* wait on sends */
574   if (nsends) {
575     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
576     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
577     ierr = PetscFree(send_status);CHKERRQ(ierr);
578   }
579   ierr = PetscFree(send_waits);CHKERRQ(ierr);
580   ierr = PetscFree(svalues);CHKERRQ(ierr);
581 
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatMult_MPIAIJ"
587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
588 {
589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590   int        ierr,nt;
591 
592   PetscFunctionBegin;
593   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
594   if (nt != A->n) {
595     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
596   }
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultAdd_MPIAIJ"
606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
614   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
615   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
622 {
623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
624   int        ierr;
625 
626   PetscFunctionBegin;
627   /* do nondiagonal part */
628   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
629   /* send it on its way */
630   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   /* do local part */
632   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
633   /* receive remote parts: note this assumes the values are not actually */
634   /* inserted in yy until the next line, which is true for my implementation*/
635   /* but is not perhaps always true. */
636   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
645   int        ierr;
646 
647   PetscFunctionBegin;
648   /* do nondiagonal part */
649   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
650   /* send it on its way */
651   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   /* do local part */
653   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
654   /* receive remote parts: note this assumes the values are not actually */
655   /* inserted in yy until the next line, which is true for my implementation*/
656   /* but is not perhaps always true. */
657   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 /*
662   This only works correctly for square matrices where the subblock A->A is the
663    diagonal block
664 */
665 #undef __FUNCT__
666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
668 {
669   int        ierr;
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671 
672   PetscFunctionBegin;
673   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
674   if (a->rstart != a->cstart || a->rend != a->cend) {
675     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
676   }
677   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatScale_MPIAIJ"
683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
684 {
685   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
690   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatDestroy_MPIAIJ"
696 int MatDestroy_MPIAIJ(Mat mat)
697 {
698   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
699   int        ierr;
700 
701   PetscFunctionBegin;
702 #if defined(PETSC_USE_LOG)
703   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
726 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
727 {
728   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
729   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
730   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
731   PetscTruth        isdraw,isascii,flg;
732   PetscViewer       sviewer;
733   PetscViewerFormat format;
734 
735   PetscFunctionBegin;
736   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
737   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
738   if (isascii) {
739     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
740     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
741       MatInfo info;
742       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
743       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
744       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
745       if (flg) {
746         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
747 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
748       } else {
749         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
750 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
751       }
752       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
753       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
754       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
755       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
756       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
757       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
758       PetscFunctionReturn(0);
759     } else if (format == PETSC_VIEWER_ASCII_INFO) {
760       PetscFunctionReturn(0);
761     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
762 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
763       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
764 #endif
765       PetscFunctionReturn(0);
766     }
767   } else if (isdraw) {
768     PetscDraw       draw;
769     PetscTruth isnull;
770     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
771     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
772   }
773 
774   if (size == 1) {
775     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
776     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
777   } else {
778     /* assemble the entire matrix onto first processor. */
779     Mat         A;
780     Mat_SeqAIJ *Aloc;
781     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
782     PetscScalar *a;
783 
784     if (!rank) {
785       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
786     } else {
787       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
788     }
789     PetscLogObjectParent(mat,A);
790 
791     /* copy over the A part */
792     Aloc = (Mat_SeqAIJ*)aij->A->data;
793     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
794     row = aij->rstart;
795     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
796     for (i=0; i<m; i++) {
797       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
798       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
799     }
800     aj = Aloc->j;
801     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
802 
803     /* copy over the B part */
804     Aloc = (Mat_SeqAIJ*)aij->B->data;
805     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
806     row  = aij->rstart;
807     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
808     ct   = cols;
809     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
810     for (i=0; i<m; i++) {
811       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
812       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
813     }
814     ierr = PetscFree(ct);CHKERRQ(ierr);
815     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
816     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
817     /*
818        Everyone has to call to draw the matrix since the graphics waits are
819        synchronized across all processors that share the PetscDraw object
820     */
821     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
822     if (!rank) {
823       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
824       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
825     }
826     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
827     ierr = MatDestroy(A);CHKERRQ(ierr);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatView_MPIAIJ"
834 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
835 {
836   int        ierr;
837   PetscTruth isascii,isdraw,issocket,isbinary;
838 
839   PetscFunctionBegin;
840   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
841   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
842   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
843   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
844   if (isascii || isdraw || isbinary || issocket) {
845     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
846   } else {
847     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatRelax_MPIAIJ"
856 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
857 {
858   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
859   int          ierr;
860   Vec          bb1;
861   PetscScalar  mone=-1.0;
862 
863   PetscFunctionBegin;
864   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
865 
866   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
867 
868   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
869     if (flag & SOR_ZERO_INITIAL_GUESS) {
870       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
871       its--;
872     }
873 
874     while (its--) {
875       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
876       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
877 
878       /* update rhs: bb1 = bb - B*x */
879       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
880       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
881 
882       /* local sweep */
883       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
884       CHKERRQ(ierr);
885     }
886   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
887     if (flag & SOR_ZERO_INITIAL_GUESS) {
888       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
889       its--;
890     }
891     while (its--) {
892       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
893       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
894 
895       /* update rhs: bb1 = bb - B*x */
896       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
897       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
898 
899       /* local sweep */
900       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
901       CHKERRQ(ierr);
902     }
903   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
904     if (flag & SOR_ZERO_INITIAL_GUESS) {
905       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
906       its--;
907     }
908     while (its--) {
909       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
910       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
911 
912       /* update rhs: bb1 = bb - B*x */
913       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
914       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
915 
916       /* local sweep */
917       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
918       CHKERRQ(ierr);
919     }
920   } else {
921     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
922   }
923 
924   ierr = VecDestroy(bb1);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetInfo_MPIAIJ"
930 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
931 {
932   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
933   Mat        A = mat->A,B = mat->B;
934   int        ierr;
935   PetscReal  isend[5],irecv[5];
936 
937   PetscFunctionBegin;
938   info->block_size     = 1.0;
939   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
940   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
941   isend[3] = info->memory;  isend[4] = info->mallocs;
942   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
943   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
944   isend[3] += info->memory;  isend[4] += info->mallocs;
945   if (flag == MAT_LOCAL) {
946     info->nz_used      = isend[0];
947     info->nz_allocated = isend[1];
948     info->nz_unneeded  = isend[2];
949     info->memory       = isend[3];
950     info->mallocs      = isend[4];
951   } else if (flag == MAT_GLOBAL_MAX) {
952     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
953     info->nz_used      = irecv[0];
954     info->nz_allocated = irecv[1];
955     info->nz_unneeded  = irecv[2];
956     info->memory       = irecv[3];
957     info->mallocs      = irecv[4];
958   } else if (flag == MAT_GLOBAL_SUM) {
959     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
960     info->nz_used      = irecv[0];
961     info->nz_allocated = irecv[1];
962     info->nz_unneeded  = irecv[2];
963     info->memory       = irecv[3];
964     info->mallocs      = irecv[4];
965   }
966   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
967   info->fill_ratio_needed = 0;
968   info->factor_mallocs    = 0;
969   info->rows_global       = (double)matin->M;
970   info->columns_global    = (double)matin->N;
971   info->rows_local        = (double)matin->m;
972   info->columns_local     = (double)matin->N;
973 
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatSetOption_MPIAIJ"
979 int MatSetOption_MPIAIJ(Mat A,MatOption op)
980 {
981   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
982   int        ierr;
983 
984   PetscFunctionBegin;
985   switch (op) {
986   case MAT_NO_NEW_NONZERO_LOCATIONS:
987   case MAT_YES_NEW_NONZERO_LOCATIONS:
988   case MAT_COLUMNS_UNSORTED:
989   case MAT_COLUMNS_SORTED:
990   case MAT_NEW_NONZERO_ALLOCATION_ERR:
991   case MAT_KEEP_ZEROED_ROWS:
992   case MAT_NEW_NONZERO_LOCATION_ERR:
993   case MAT_USE_INODES:
994   case MAT_DO_NOT_USE_INODES:
995   case MAT_IGNORE_ZERO_ENTRIES:
996     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
997     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
998     break;
999   case MAT_ROW_ORIENTED:
1000     a->roworiented = PETSC_TRUE;
1001     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1002     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1003     break;
1004   case MAT_ROWS_SORTED:
1005   case MAT_ROWS_UNSORTED:
1006   case MAT_YES_NEW_DIAGONALS:
1007   case MAT_USE_SINGLE_PRECISION_SOLVES:
1008     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1009     break;
1010   case MAT_COLUMN_ORIENTED:
1011     a->roworiented = PETSC_FALSE;
1012     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1013     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1014     break;
1015   case MAT_IGNORE_OFF_PROC_ENTRIES:
1016     a->donotstash = PETSC_TRUE;
1017     break;
1018   case MAT_NO_NEW_DIAGONALS:
1019     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1020   default:
1021     SETERRQ(PETSC_ERR_SUP,"unknown option");
1022   }
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatGetRow_MPIAIJ"
1028 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1029 {
1030   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1031   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1032   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1033   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1034   int          *cmap,*idx_p;
1035 
1036   PetscFunctionBegin;
1037   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1038   mat->getrowactive = PETSC_TRUE;
1039 
1040   if (!mat->rowvalues && (idx || v)) {
1041     /*
1042         allocate enough space to hold information from the longest row.
1043     */
1044     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1045     int     max = 1,tmp;
1046     for (i=0; i<matin->m; i++) {
1047       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1048       if (max < tmp) { max = tmp; }
1049     }
1050     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1051     mat->rowindices = (int*)(mat->rowvalues + max);
1052   }
1053 
1054   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1055   lrow = row - rstart;
1056 
1057   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1058   if (!v)   {pvA = 0; pvB = 0;}
1059   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1060   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   nztot = nzA + nzB;
1063 
1064   cmap  = mat->garray;
1065   if (v  || idx) {
1066     if (nztot) {
1067       /* Sort by increasing column numbers, assuming A and B already sorted */
1068       int imark = -1;
1069       if (v) {
1070         *v = v_p = mat->rowvalues;
1071         for (i=0; i<nzB; i++) {
1072           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1073           else break;
1074         }
1075         imark = i;
1076         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1077         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1078       }
1079       if (idx) {
1080         *idx = idx_p = mat->rowindices;
1081         if (imark > -1) {
1082           for (i=0; i<imark; i++) {
1083             idx_p[i] = cmap[cworkB[i]];
1084           }
1085         } else {
1086           for (i=0; i<nzB; i++) {
1087             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1088             else break;
1089           }
1090           imark = i;
1091         }
1092         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1093         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1094       }
1095     } else {
1096       if (idx) *idx = 0;
1097       if (v)   *v   = 0;
1098     }
1099   }
1100   *nz = nztot;
1101   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1102   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1108 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1109 {
1110   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1111 
1112   PetscFunctionBegin;
1113   if (aij->getrowactive == PETSC_FALSE) {
1114     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1115   }
1116   aij->getrowactive = PETSC_FALSE;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatNorm_MPIAIJ"
1122 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1123 {
1124   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1125   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1126   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1127   PetscReal    sum = 0.0;
1128   PetscScalar  *v;
1129 
1130   PetscFunctionBegin;
1131   if (aij->size == 1) {
1132     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1133   } else {
1134     if (type == NORM_FROBENIUS) {
1135       v = amat->a;
1136       for (i=0; i<amat->nz; i++) {
1137 #if defined(PETSC_USE_COMPLEX)
1138         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1139 #else
1140         sum += (*v)*(*v); v++;
1141 #endif
1142       }
1143       v = bmat->a;
1144       for (i=0; i<bmat->nz; i++) {
1145 #if defined(PETSC_USE_COMPLEX)
1146         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1147 #else
1148         sum += (*v)*(*v); v++;
1149 #endif
1150       }
1151       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1152       *norm = sqrt(*norm);
1153     } else if (type == NORM_1) { /* max column norm */
1154       PetscReal *tmp,*tmp2;
1155       int    *jj,*garray = aij->garray;
1156       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1157       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1158       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1159       *norm = 0.0;
1160       v = amat->a; jj = amat->j;
1161       for (j=0; j<amat->nz; j++) {
1162         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1163       }
1164       v = bmat->a; jj = bmat->j;
1165       for (j=0; j<bmat->nz; j++) {
1166         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1167       }
1168       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1169       for (j=0; j<mat->N; j++) {
1170         if (tmp2[j] > *norm) *norm = tmp2[j];
1171       }
1172       ierr = PetscFree(tmp);CHKERRQ(ierr);
1173       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1174     } else if (type == NORM_INFINITY) { /* max row norm */
1175       PetscReal ntemp = 0.0;
1176       for (j=0; j<aij->A->m; j++) {
1177         v = amat->a + amat->i[j] + shift;
1178         sum = 0.0;
1179         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1180           sum += PetscAbsScalar(*v); v++;
1181         }
1182         v = bmat->a + bmat->i[j] + shift;
1183         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1184           sum += PetscAbsScalar(*v); v++;
1185         }
1186         if (sum > ntemp) ntemp = sum;
1187       }
1188       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1189     } else {
1190       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1191     }
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatTranspose_MPIAIJ"
1198 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1199 {
1200   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1201   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1202   int          ierr,shift = Aloc->indexshift;
1203   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1204   Mat          B;
1205   PetscScalar  *array;
1206 
1207   PetscFunctionBegin;
1208   if (!matout && M != N) {
1209     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1210   }
1211 
1212   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1213 
1214   /* copy over the A part */
1215   Aloc = (Mat_SeqAIJ*)a->A->data;
1216   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1217   row = a->rstart;
1218   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1219   for (i=0; i<m; i++) {
1220     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1221     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1222   }
1223   aj = Aloc->j;
1224   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1225 
1226   /* copy over the B part */
1227   Aloc = (Mat_SeqAIJ*)a->B->data;
1228   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1229   row  = a->rstart;
1230   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1231   ct   = cols;
1232   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1233   for (i=0; i<m; i++) {
1234     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1235     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1236   }
1237   ierr = PetscFree(ct);CHKERRQ(ierr);
1238   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1239   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1240   if (matout) {
1241     *matout = B;
1242   } else {
1243     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1250 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1251 {
1252   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1253   Mat        a = aij->A,b = aij->B;
1254   int        ierr,s1,s2,s3;
1255 
1256   PetscFunctionBegin;
1257   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1258   if (rr) {
1259     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1260     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1261     /* Overlap communication with computation. */
1262     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1263   }
1264   if (ll) {
1265     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1266     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1267     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1268   }
1269   /* scale  the diagonal block */
1270   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1271 
1272   if (rr) {
1273     /* Do a scatter end and then right scale the off-diagonal block */
1274     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1275     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1276   }
1277 
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1284 int MatPrintHelp_MPIAIJ(Mat A)
1285 {
1286   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1287   int        ierr;
1288 
1289   PetscFunctionBegin;
1290   if (!a->rank) {
1291     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1298 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1299 {
1300   PetscFunctionBegin;
1301   *bs = 1;
1302   PetscFunctionReturn(0);
1303 }
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1306 int MatSetUnfactored_MPIAIJ(Mat A)
1307 {
1308   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1309   int        ierr;
1310 
1311   PetscFunctionBegin;
1312   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatEqual_MPIAIJ"
1318 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1319 {
1320   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1321   Mat        a,b,c,d;
1322   PetscTruth flg;
1323   int        ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1327   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1328   a = matA->A; b = matA->B;
1329   c = matB->A; d = matB->B;
1330 
1331   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1332   if (flg == PETSC_TRUE) {
1333     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1334   }
1335   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatCopy_MPIAIJ"
1341 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1342 {
1343   int        ierr;
1344   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1345   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1346   PetscTruth flg;
1347 
1348   PetscFunctionBegin;
1349   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1350   if (str != SAME_NONZERO_PATTERN || !flg) {
1351     /* because of the column compression in the off-processor part of the matrix a->B,
1352        the number of columns in a->B and b->B may be different, hence we cannot call
1353        the MatCopy() directly on the two parts. If need be, we can provide a more
1354        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1355        then copying the submatrices */
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1366 int MatSetUpPreallocation_MPIAIJ(Mat A)
1367 {
1368   int        ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1376 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1377 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1378 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1379 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1380 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1381 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1382 #endif
1383 
1384 #include "petscblaslapack.h"
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatAXPY_MPIAIJ"
1388 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1389 {
1390   int        ierr,one;
1391   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1392   Mat_SeqAIJ *x,*y;
1393 
1394   PetscFunctionBegin;
1395   if (str == SAME_NONZERO_PATTERN) {
1396     x  = (Mat_SeqAIJ *)xx->A->data;
1397     y  = (Mat_SeqAIJ *)yy->A->data;
1398     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1399     x  = (Mat_SeqAIJ *)xx->B->data;
1400     y  = (Mat_SeqAIJ *)yy->B->data;
1401     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1402   } else {
1403     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /* -------------------------------------------------------------------*/
1409 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1410        MatGetRow_MPIAIJ,
1411        MatRestoreRow_MPIAIJ,
1412        MatMult_MPIAIJ,
1413        MatMultAdd_MPIAIJ,
1414        MatMultTranspose_MPIAIJ,
1415        MatMultTransposeAdd_MPIAIJ,
1416        0,
1417        0,
1418        0,
1419        0,
1420        0,
1421        0,
1422        MatRelax_MPIAIJ,
1423        MatTranspose_MPIAIJ,
1424        MatGetInfo_MPIAIJ,
1425        MatEqual_MPIAIJ,
1426        MatGetDiagonal_MPIAIJ,
1427        MatDiagonalScale_MPIAIJ,
1428        MatNorm_MPIAIJ,
1429        MatAssemblyBegin_MPIAIJ,
1430        MatAssemblyEnd_MPIAIJ,
1431        0,
1432        MatSetOption_MPIAIJ,
1433        MatZeroEntries_MPIAIJ,
1434        MatZeroRows_MPIAIJ,
1435 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1436        MatLUFactorSymbolic_MPIAIJ_TFS,
1437 #else
1438        0,
1439 #endif
1440        0,
1441        0,
1442        0,
1443        MatSetUpPreallocation_MPIAIJ,
1444        0,
1445        0,
1446        0,
1447        0,
1448        MatDuplicate_MPIAIJ,
1449        0,
1450        0,
1451        0,
1452        0,
1453        MatAXPY_MPIAIJ,
1454        MatGetSubMatrices_MPIAIJ,
1455        MatIncreaseOverlap_MPIAIJ,
1456        MatGetValues_MPIAIJ,
1457        MatCopy_MPIAIJ,
1458        MatPrintHelp_MPIAIJ,
1459        MatScale_MPIAIJ,
1460        0,
1461        0,
1462        0,
1463        MatGetBlockSize_MPIAIJ,
1464        0,
1465        0,
1466        0,
1467        0,
1468        MatFDColoringCreate_MPIAIJ,
1469        0,
1470        MatSetUnfactored_MPIAIJ,
1471        0,
1472        0,
1473        MatGetSubMatrix_MPIAIJ,
1474        MatDestroy_MPIAIJ,
1475        MatView_MPIAIJ,
1476        MatGetPetscMaps_Petsc,
1477        0,
1478        0,
1479        0,
1480        0,
1481        0,
1482        0,
1483        0,
1484        0,
1485        MatSetColoring_MPIAIJ,
1486        MatSetValuesAdic_MPIAIJ,
1487        MatSetValuesAdifor_MPIAIJ
1488 };
1489 
1490 /* ----------------------------------------------------------------------------------------*/
1491 
1492 EXTERN_C_BEGIN
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1495 int MatStoreValues_MPIAIJ(Mat mat)
1496 {
1497   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1498   int        ierr;
1499 
1500   PetscFunctionBegin;
1501   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1502   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 EXTERN_C_END
1506 
1507 EXTERN_C_BEGIN
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1510 int MatRetrieveValues_MPIAIJ(Mat mat)
1511 {
1512   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1513   int        ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1517   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 EXTERN_C_END
1521 
1522 #include "petscpc.h"
1523 EXTERN_C_BEGIN
1524 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1525 EXTERN_C_END
1526 
1527 EXTERN_C_BEGIN
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatCreate_MPIAIJ"
1530 int MatCreate_MPIAIJ(Mat B)
1531 {
1532   Mat_MPIAIJ *b;
1533   int        ierr,i,size;
1534 #if defined(PETSC_HAVE_SUPERLUDIST)
1535   PetscTruth flg;
1536 #endif
1537 
1538 
1539   PetscFunctionBegin;
1540   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1541 
1542   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1543   B->data         = (void*)b;
1544   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1545   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1546   B->factor       = 0;
1547   B->assembled    = PETSC_FALSE;
1548   B->mapping      = 0;
1549 
1550   B->insertmode      = NOT_SET_VALUES;
1551   b->size            = size;
1552   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1553 
1554   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1555   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1556 
1557   /* the information in the maps duplicates the information computed below, eventually
1558      we should remove the duplicate information that is not contained in the maps */
1559   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1560   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1561 
1562   /* build local table of row and column ownerships */
1563   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1564   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1565   b->cowners = b->rowners + b->size + 2;
1566   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1567   b->rowners[0] = 0;
1568   for (i=2; i<=b->size; i++) {
1569     b->rowners[i] += b->rowners[i-1];
1570   }
1571   b->rstart = b->rowners[b->rank];
1572   b->rend   = b->rowners[b->rank+1];
1573   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1574   b->cowners[0] = 0;
1575   for (i=2; i<=b->size; i++) {
1576     b->cowners[i] += b->cowners[i-1];
1577   }
1578   b->cstart = b->cowners[b->rank];
1579   b->cend   = b->cowners[b->rank+1];
1580 
1581   /* build cache for off array entries formed */
1582   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1583   b->donotstash  = PETSC_FALSE;
1584   b->colmap      = 0;
1585   b->garray      = 0;
1586   b->roworiented = PETSC_TRUE;
1587 
1588   /* stuff used for matrix vector multiply */
1589   b->lvec      = PETSC_NULL;
1590   b->Mvctx     = PETSC_NULL;
1591 
1592   /* stuff for MatGetRow() */
1593   b->rowindices   = 0;
1594   b->rowvalues    = 0;
1595   b->getrowactive = PETSC_FALSE;
1596   /*
1597 #if defined(PETSC_HAVE_SUPERLUDIST)
1598   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1599   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1600 #endif
1601   */
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1603                                      "MatStoreValues_MPIAIJ",
1604                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1606                                      "MatRetrieveValues_MPIAIJ",
1607                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1609                                      "MatGetDiagonalBlock_MPIAIJ",
1610                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1611 
1612   PetscFunctionReturn(0);
1613 }
1614 EXTERN_C_END
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1618 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1619 {
1620   Mat        mat;
1621   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1622   int        ierr;
1623 
1624   PetscFunctionBegin;
1625   *newmat       = 0;
1626   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1627   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1628   a    = (Mat_MPIAIJ*)mat->data;
1629   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1630   mat->factor       = matin->factor;
1631   mat->assembled    = PETSC_TRUE;
1632   mat->insertmode   = NOT_SET_VALUES;
1633   mat->preallocated = PETSC_TRUE;
1634 
1635   a->rstart       = oldmat->rstart;
1636   a->rend         = oldmat->rend;
1637   a->cstart       = oldmat->cstart;
1638   a->cend         = oldmat->cend;
1639   a->size         = oldmat->size;
1640   a->rank         = oldmat->rank;
1641   a->donotstash   = oldmat->donotstash;
1642   a->roworiented  = oldmat->roworiented;
1643   a->rowindices   = 0;
1644   a->rowvalues    = 0;
1645   a->getrowactive = PETSC_FALSE;
1646 
1647   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1648   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1649   if (oldmat->colmap) {
1650 #if defined (PETSC_USE_CTABLE)
1651     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1652 #else
1653     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1654     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1655     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1656 #endif
1657   } else a->colmap = 0;
1658   if (oldmat->garray) {
1659     int len;
1660     len  = oldmat->B->n;
1661     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1662     PetscLogObjectMemory(mat,len*sizeof(int));
1663     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1664   } else a->garray = 0;
1665 
1666   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1667   PetscLogObjectParent(mat,a->lvec);
1668   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1669   PetscLogObjectParent(mat,a->Mvctx);
1670   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1671   PetscLogObjectParent(mat,a->A);
1672   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1673   PetscLogObjectParent(mat,a->B);
1674   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1675   *newmat = mat;
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #include "petscsys.h"
1680 
1681 EXTERN_C_BEGIN
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatLoad_MPIAIJ"
1684 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1685 {
1686   Mat          A;
1687   PetscScalar  *vals,*svals;
1688   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1689   MPI_Status   status;
1690   int          i,nz,ierr,j,rstart,rend,fd;
1691   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1692   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1693   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1694 
1695   PetscFunctionBegin;
1696   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1698   if (!rank) {
1699     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1700     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1701     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1702     if (header[3] < 0) {
1703       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1704     }
1705   }
1706 
1707   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1708   M = header[1]; N = header[2];
1709   /* determine ownership of all rows */
1710   m = M/size + ((M % size) > rank);
1711   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1712   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1713   rowners[0] = 0;
1714   for (i=2; i<=size; i++) {
1715     rowners[i] += rowners[i-1];
1716   }
1717   rstart = rowners[rank];
1718   rend   = rowners[rank+1];
1719 
1720   /* distribute row lengths to all processors */
1721   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1722   offlens = ourlens + (rend-rstart);
1723   if (!rank) {
1724     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1725     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1726     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1727     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1728     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1729     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1730   } else {
1731     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1732   }
1733 
1734   if (!rank) {
1735     /* calculate the number of nonzeros on each processor */
1736     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1737     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1738     for (i=0; i<size; i++) {
1739       for (j=rowners[i]; j< rowners[i+1]; j++) {
1740         procsnz[i] += rowlengths[j];
1741       }
1742     }
1743     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1744 
1745     /* determine max buffer needed and allocate it */
1746     maxnz = 0;
1747     for (i=0; i<size; i++) {
1748       maxnz = PetscMax(maxnz,procsnz[i]);
1749     }
1750     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1751 
1752     /* read in my part of the matrix column indices  */
1753     nz   = procsnz[0];
1754     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1755     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1756 
1757     /* read in every one elses and ship off */
1758     for (i=1; i<size; i++) {
1759       nz   = procsnz[i];
1760       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1761       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1762     }
1763     ierr = PetscFree(cols);CHKERRQ(ierr);
1764   } else {
1765     /* determine buffer space needed for message */
1766     nz = 0;
1767     for (i=0; i<m; i++) {
1768       nz += ourlens[i];
1769     }
1770     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1771 
1772     /* receive message of column indices*/
1773     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1774     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1775     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1776   }
1777 
1778   /* determine column ownership if matrix is not square */
1779   if (N != M) {
1780     n      = N/size + ((N % size) > rank);
1781     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1782     cstart = cend - n;
1783   } else {
1784     cstart = rstart;
1785     cend   = rend;
1786     n      = cend - cstart;
1787   }
1788 
1789   /* loop over local rows, determining number of off diagonal entries */
1790   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1791   jj = 0;
1792   for (i=0; i<m; i++) {
1793     for (j=0; j<ourlens[i]; j++) {
1794       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1795       jj++;
1796     }
1797   }
1798 
1799   /* create our matrix */
1800   for (i=0; i<m; i++) {
1801     ourlens[i] -= offlens[i];
1802   }
1803   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1804   A = *newmat;
1805   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1806   for (i=0; i<m; i++) {
1807     ourlens[i] += offlens[i];
1808   }
1809 
1810   if (!rank) {
1811     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1812 
1813     /* read in my part of the matrix numerical values  */
1814     nz   = procsnz[0];
1815     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1816 
1817     /* insert into matrix */
1818     jj      = rstart;
1819     smycols = mycols;
1820     svals   = vals;
1821     for (i=0; i<m; i++) {
1822       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1823       smycols += ourlens[i];
1824       svals   += ourlens[i];
1825       jj++;
1826     }
1827 
1828     /* read in other processors and ship out */
1829     for (i=1; i<size; i++) {
1830       nz   = procsnz[i];
1831       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1832       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1833     }
1834     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1835   } else {
1836     /* receive numeric values */
1837     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1838 
1839     /* receive message of values*/
1840     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1841     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1842     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1843 
1844     /* insert into matrix */
1845     jj      = rstart;
1846     smycols = mycols;
1847     svals   = vals;
1848     for (i=0; i<m; i++) {
1849       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1850       smycols += ourlens[i];
1851       svals   += ourlens[i];
1852       jj++;
1853     }
1854   }
1855   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1856   ierr = PetscFree(vals);CHKERRQ(ierr);
1857   ierr = PetscFree(mycols);CHKERRQ(ierr);
1858   ierr = PetscFree(rowners);CHKERRQ(ierr);
1859 
1860   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1861   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 EXTERN_C_END
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1868 /*
1869     Not great since it makes two copies of the submatrix, first an SeqAIJ
1870   in local and then by concatenating the local matrices the end result.
1871   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1872 */
1873 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1874 {
1875   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1876   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1877   Mat          *local,M,Mreuse;
1878   PetscScalar  *vwork,*aa;
1879   MPI_Comm     comm = mat->comm;
1880   Mat_SeqAIJ   *aij;
1881 
1882 
1883   PetscFunctionBegin;
1884   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1886 
1887   if (call ==  MAT_REUSE_MATRIX) {
1888     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1889     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1890     local = &Mreuse;
1891     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1892   } else {
1893     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1894     Mreuse = *local;
1895     ierr   = PetscFree(local);CHKERRQ(ierr);
1896   }
1897 
1898   /*
1899       m - number of local rows
1900       n - number of columns (same on all processors)
1901       rstart - first row in new global matrix generated
1902   */
1903   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1904   if (call == MAT_INITIAL_MATRIX) {
1905     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1906     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1907     ii  = aij->i;
1908     jj  = aij->j;
1909 
1910     /*
1911         Determine the number of non-zeros in the diagonal and off-diagonal
1912         portions of the matrix in order to do correct preallocation
1913     */
1914 
1915     /* first get start and end of "diagonal" columns */
1916     if (csize == PETSC_DECIDE) {
1917       nlocal = n/size + ((n % size) > rank);
1918     } else {
1919       nlocal = csize;
1920     }
1921     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1922     rstart = rend - nlocal;
1923     if (rank == size - 1 && rend != n) {
1924       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1925     }
1926 
1927     /* next, compute all the lengths */
1928     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1929     olens = dlens + m;
1930     for (i=0; i<m; i++) {
1931       jend = ii[i+1] - ii[i];
1932       olen = 0;
1933       dlen = 0;
1934       for (j=0; j<jend; j++) {
1935         if (*jj < rstart || *jj >= rend) olen++;
1936         else dlen++;
1937         jj++;
1938       }
1939       olens[i] = olen;
1940       dlens[i] = dlen;
1941     }
1942     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1943     ierr = PetscFree(dlens);CHKERRQ(ierr);
1944   } else {
1945     int ml,nl;
1946 
1947     M = *newmat;
1948     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1949     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1950     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1951     /*
1952          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1953        rather than the slower MatSetValues().
1954     */
1955     M->was_assembled = PETSC_TRUE;
1956     M->assembled     = PETSC_FALSE;
1957   }
1958   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1959   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1960   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1961   ii  = aij->i;
1962   jj  = aij->j;
1963   aa  = aij->a;
1964   for (i=0; i<m; i++) {
1965     row   = rstart + i;
1966     nz    = ii[i+1] - ii[i];
1967     cwork = jj;     jj += nz;
1968     vwork = aa;     aa += nz;
1969     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1970   }
1971 
1972   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1974   *newmat = M;
1975 
1976   /* save submatrix used in processor for next request */
1977   if (call ==  MAT_INITIAL_MATRIX) {
1978     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1979     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1980   }
1981 
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1987 /*@C
1988    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1989    (the default parallel PETSc format).  For good matrix assembly performance
1990    the user should preallocate the matrix storage by setting the parameters
1991    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1992    performance can be increased by more than a factor of 50.
1993 
1994    Collective on MPI_Comm
1995 
1996    Input Parameters:
1997 +  A - the matrix
1998 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1999            (same value is used for all local rows)
2000 .  d_nnz - array containing the number of nonzeros in the various rows of the
2001            DIAGONAL portion of the local submatrix (possibly different for each row)
2002            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2003            The size of this array is equal to the number of local rows, i.e 'm'.
2004            You must leave room for the diagonal entry even if it is zero.
2005 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2006            submatrix (same value is used for all local rows).
2007 -  o_nnz - array containing the number of nonzeros in the various rows of the
2008            OFF-DIAGONAL portion of the local submatrix (possibly different for
2009            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2010            structure. The size of this array is equal to the number
2011            of local rows, i.e 'm'.
2012 
2013    The AIJ format (also called the Yale sparse matrix format or
2014    compressed row storage), is fully compatible with standard Fortran 77
2015    storage.  That is, the stored row and column indices can begin at
2016    either one (as in Fortran) or zero.  See the users manual for details.
2017 
2018    The user MUST specify either the local or global matrix dimensions
2019    (possibly both).
2020 
2021    The parallel matrix is partitioned such that the first m0 rows belong to
2022    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2023    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2024 
2025    The DIAGONAL portion of the local submatrix of a processor can be defined
2026    as the submatrix which is obtained by extraction the part corresponding
2027    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2028    first row that belongs to the processor, and r2 is the last row belonging
2029    to the this processor. This is a square mxm matrix. The remaining portion
2030    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2031 
2032    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2033 
2034    By default, this format uses inodes (identical nodes) when possible.
2035    We search for consecutive rows with the same nonzero structure, thereby
2036    reusing matrix information to achieve increased efficiency.
2037 
2038    Options Database Keys:
2039 +  -mat_aij_no_inode  - Do not use inodes
2040 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2041 -  -mat_aij_oneindex - Internally use indexing starting at 1
2042         rather than 0.  Note that when calling MatSetValues(),
2043         the user still MUST index entries starting at 0!
2044 
2045    Example usage:
2046 
2047    Consider the following 8x8 matrix with 34 non-zero values, that is
2048    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2049    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2050    as follows:
2051 
2052 .vb
2053             1  2  0  |  0  3  0  |  0  4
2054     Proc0   0  5  6  |  7  0  0  |  8  0
2055             9  0 10  | 11  0  0  | 12  0
2056     -------------------------------------
2057            13  0 14  | 15 16 17  |  0  0
2058     Proc1   0 18  0  | 19 20 21  |  0  0
2059             0  0  0  | 22 23  0  | 24  0
2060     -------------------------------------
2061     Proc2  25 26 27  |  0  0 28  | 29  0
2062            30  0  0  | 31 32 33  |  0 34
2063 .ve
2064 
2065    This can be represented as a collection of submatrices as:
2066 
2067 .vb
2068       A B C
2069       D E F
2070       G H I
2071 .ve
2072 
2073    Where the submatrices A,B,C are owned by proc0, D,E,F are
2074    owned by proc1, G,H,I are owned by proc2.
2075 
2076    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2077    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2078    The 'M','N' parameters are 8,8, and have the same values on all procs.
2079 
2080    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2081    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2082    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2083    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2084    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2085    matrix, ans [DF] as another SeqAIJ matrix.
2086 
2087    When d_nz, o_nz parameters are specified, d_nz storage elements are
2088    allocated for every row of the local diagonal submatrix, and o_nz
2089    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2090    One way to choose d_nz and o_nz is to use the max nonzerors per local
2091    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2092    In this case, the values of d_nz,o_nz are:
2093 .vb
2094      proc0 : dnz = 2, o_nz = 2
2095      proc1 : dnz = 3, o_nz = 2
2096      proc2 : dnz = 1, o_nz = 4
2097 .ve
2098    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2099    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2100    for proc3. i.e we are using 12+15+10=37 storage locations to store
2101    34 values.
2102 
2103    When d_nnz, o_nnz parameters are specified, the storage is specified
2104    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2105    In the above case the values for d_nnz,o_nnz are:
2106 .vb
2107      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2108      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2109      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2110 .ve
2111    Here the space allocated is sum of all the above values i.e 34, and
2112    hence pre-allocation is perfect.
2113 
2114    Level: intermediate
2115 
2116 .keywords: matrix, aij, compressed row, sparse, parallel
2117 
2118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2119 @*/
2120 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2121 {
2122   Mat_MPIAIJ   *b;
2123   int          ierr,i;
2124   PetscTruth   flg2;
2125 
2126   PetscFunctionBegin;
2127   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2128   if (!flg2) PetscFunctionReturn(0);
2129   B->preallocated = PETSC_TRUE;
2130   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2131   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2132   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2133   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2134   if (d_nnz) {
2135     for (i=0; i<B->m; i++) {
2136       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2137     }
2138   }
2139   if (o_nnz) {
2140     for (i=0; i<B->m; i++) {
2141       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2142     }
2143   }
2144   b = (Mat_MPIAIJ*)B->data;
2145 
2146   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2147   PetscLogObjectParent(B,b->A);
2148   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2149   PetscLogObjectParent(B,b->B);
2150 
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatCreateMPIAIJ"
2156 /*@C
2157    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2158    (the default parallel PETSc format).  For good matrix assembly performance
2159    the user should preallocate the matrix storage by setting the parameters
2160    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2161    performance can be increased by more than a factor of 50.
2162 
2163    Collective on MPI_Comm
2164 
2165    Input Parameters:
2166 +  comm - MPI communicator
2167 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2168            This value should be the same as the local size used in creating the
2169            y vector for the matrix-vector product y = Ax.
2170 .  n - This value should be the same as the local size used in creating the
2171        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2172        calculated if N is given) For square matrices n is almost always m.
2173 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2174 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2175 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2176            (same value is used for all local rows)
2177 .  d_nnz - array containing the number of nonzeros in the various rows of the
2178            DIAGONAL portion of the local submatrix (possibly different for each row)
2179            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2180            The size of this array is equal to the number of local rows, i.e 'm'.
2181            You must leave room for the diagonal entry even if it is zero.
2182 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2183            submatrix (same value is used for all local rows).
2184 -  o_nnz - array containing the number of nonzeros in the various rows of the
2185            OFF-DIAGONAL portion of the local submatrix (possibly different for
2186            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2187            structure. The size of this array is equal to the number
2188            of local rows, i.e 'm'.
2189 
2190    Output Parameter:
2191 .  A - the matrix
2192 
2193    Notes:
2194    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2195    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2196    storage requirements for this matrix.
2197 
2198    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2199    processor than it must be used on all processors that share the object for
2200    that argument.
2201 
2202    The AIJ format (also called the Yale sparse matrix format or
2203    compressed row storage), is fully compatible with standard Fortran 77
2204    storage.  That is, the stored row and column indices can begin at
2205    either one (as in Fortran) or zero.  See the users manual for details.
2206 
2207    The user MUST specify either the local or global matrix dimensions
2208    (possibly both).
2209 
2210    The parallel matrix is partitioned such that the first m0 rows belong to
2211    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2212    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2213 
2214    The DIAGONAL portion of the local submatrix of a processor can be defined
2215    as the submatrix which is obtained by extraction the part corresponding
2216    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2217    first row that belongs to the processor, and r2 is the last row belonging
2218    to the this processor. This is a square mxm matrix. The remaining portion
2219    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2220 
2221    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2222 
2223    By default, this format uses inodes (identical nodes) when possible.
2224    We search for consecutive rows with the same nonzero structure, thereby
2225    reusing matrix information to achieve increased efficiency.
2226 
2227    Options Database Keys:
2228 +  -mat_aij_no_inode  - Do not use inodes
2229 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2230 -  -mat_aij_oneindex - Internally use indexing starting at 1
2231         rather than 0.  Note that when calling MatSetValues(),
2232         the user still MUST index entries starting at 0!
2233 
2234 
2235    Example usage:
2236 
2237    Consider the following 8x8 matrix with 34 non-zero values, that is
2238    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2239    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2240    as follows:
2241 
2242 .vb
2243             1  2  0  |  0  3  0  |  0  4
2244     Proc0   0  5  6  |  7  0  0  |  8  0
2245             9  0 10  | 11  0  0  | 12  0
2246     -------------------------------------
2247            13  0 14  | 15 16 17  |  0  0
2248     Proc1   0 18  0  | 19 20 21  |  0  0
2249             0  0  0  | 22 23  0  | 24  0
2250     -------------------------------------
2251     Proc2  25 26 27  |  0  0 28  | 29  0
2252            30  0  0  | 31 32 33  |  0 34
2253 .ve
2254 
2255    This can be represented as a collection of submatrices as:
2256 
2257 .vb
2258       A B C
2259       D E F
2260       G H I
2261 .ve
2262 
2263    Where the submatrices A,B,C are owned by proc0, D,E,F are
2264    owned by proc1, G,H,I are owned by proc2.
2265 
2266    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2267    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2268    The 'M','N' parameters are 8,8, and have the same values on all procs.
2269 
2270    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2271    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2272    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2273    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2274    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2275    matrix, ans [DF] as another SeqAIJ matrix.
2276 
2277    When d_nz, o_nz parameters are specified, d_nz storage elements are
2278    allocated for every row of the local diagonal submatrix, and o_nz
2279    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2280    One way to choose d_nz and o_nz is to use the max nonzerors per local
2281    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2282    In this case, the values of d_nz,o_nz are:
2283 .vb
2284      proc0 : dnz = 2, o_nz = 2
2285      proc1 : dnz = 3, o_nz = 2
2286      proc2 : dnz = 1, o_nz = 4
2287 .ve
2288    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2289    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2290    for proc3. i.e we are using 12+15+10=37 storage locations to store
2291    34 values.
2292 
2293    When d_nnz, o_nnz parameters are specified, the storage is specified
2294    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2295    In the above case the values for d_nnz,o_nnz are:
2296 .vb
2297      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2298      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2299      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2300 .ve
2301    Here the space allocated is sum of all the above values i.e 34, and
2302    hence pre-allocation is perfect.
2303 
2304    Level: intermediate
2305 
2306 .keywords: matrix, aij, compressed row, sparse, parallel
2307 
2308 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2309 @*/
2310 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2311 {
2312   int ierr,size;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2317   if (size > 1) {
2318     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2319     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2320   } else {
2321     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2322     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2323   }
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2329 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2330 {
2331   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2332   PetscFunctionBegin;
2333   *Ad     = a->A;
2334   *Ao     = a->B;
2335   *colmap = a->garray;
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2341 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2342 {
2343   int        ierr;
2344   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2345 
2346   PetscFunctionBegin;
2347   if (coloring->ctype == IS_COLORING_LOCAL) {
2348     int        *allcolors,*colors,i;
2349     ISColoring ocoloring;
2350 
2351     /* set coloring for diagonal portion */
2352     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2353 
2354     /* set coloring for off-diagonal portion */
2355     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2356     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2357     for (i=0; i<a->B->n; i++) {
2358       colors[i] = allcolors[a->garray[i]];
2359     }
2360     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2361     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2362     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2363     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2364   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2365     int        *colors,i,*larray;
2366     ISColoring ocoloring;
2367 
2368     /* set coloring for diagonal portion */
2369     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2370     for (i=0; i<a->A->n; i++) {
2371       larray[i] = i + a->cstart;
2372     }
2373     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2374     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2375     for (i=0; i<a->A->n; i++) {
2376       colors[i] = coloring->colors[larray[i]];
2377     }
2378     ierr = PetscFree(larray);CHKERRQ(ierr);
2379     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2380     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2381     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2382 
2383     /* set coloring for off-diagonal portion */
2384     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2385     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2386     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2387     for (i=0; i<a->B->n; i++) {
2388       colors[i] = coloring->colors[larray[i]];
2389     }
2390     ierr = PetscFree(larray);CHKERRQ(ierr);
2391     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2392     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2393     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2394   } else {
2395     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2396   }
2397 
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2403 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2404 {
2405   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2406   int        ierr;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2410   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2416 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2417 {
2418   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2419   int        ierr;
2420 
2421   PetscFunctionBegin;
2422   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2423   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2424   PetscFunctionReturn(0);
2425 }
2426