xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 73f4d3771d9e6ab3f04055eab794d7609818b9d3)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for (i=0; i<n;) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433   PetscTruth     found;
434 
435   PetscFunctionBegin;
436   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
442   procs  = nprocs + size;
443   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
444   for (i=0; i<N; i++) {
445     idx = rows[i];
446     found = PETSC_FALSE;
447     for (j=0; j<size; j++) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450       }
451     }
452     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453   }
454   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
458   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
459   nrecvs = work[size+rank];
460   nmax   = work[rank];
461   ierr   = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != A->n) {
581     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
711 {
712   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
713   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
714   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
715   PetscTruth        isdraw,isascii,flg;
716   PetscViewer       sviewer;
717   PetscViewerFormat format;
718 
719   PetscFunctionBegin;
720   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   if (isascii) {
723     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
724     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
725       MatInfo info;
726       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
727       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
729       if (flg) {
730         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
731 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
732       } else {
733         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
734 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
735       }
736       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
737       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
738       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
739       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
740       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
741       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
742       PetscFunctionReturn(0);
743     } else if (format == PETSC_VIEWER_ASCII_INFO) {
744       PetscFunctionReturn(0);
745     }
746   } else if (isdraw) {
747     PetscDraw       draw;
748     PetscTruth isnull;
749     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
750     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
751   }
752 
753   if (size == 1) {
754     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
755     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
756   } else {
757     /* assemble the entire matrix onto first processor. */
758     Mat         A;
759     Mat_SeqAIJ *Aloc;
760     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
761     PetscScalar *a;
762 
763     if (!rank) {
764       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
765     } else {
766       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
767     }
768     PetscLogObjectParent(mat,A);
769 
770     /* copy over the A part */
771     Aloc = (Mat_SeqAIJ*)aij->A->data;
772     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
773     row = aij->rstart;
774     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
775     for (i=0; i<m; i++) {
776       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
777       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
778     }
779     aj = Aloc->j;
780     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
781 
782     /* copy over the B part */
783     Aloc = (Mat_SeqAIJ*)aij->B->data;
784     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
785     row  = aij->rstart;
786     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
787     ct   = cols;
788     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
789     for (i=0; i<m; i++) {
790       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
791       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
792     }
793     ierr = PetscFree(ct);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPIAIJ"
813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
823   if (isascii || isdraw || isbinary || issocket) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832     This has to provide several versions.
833 
834      2) a) use only local smoothing updating outer values only once.
835         b) local smoothing updating outer values each inner iteration
836      3) color updating out values betwen colors.
837 */
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRelax_MPIAIJ"
840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
841 {
842   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
843   Mat          AA = mat->A,BB = mat->B;
844   Mat_SeqAIJ   *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
845   PetscScalar  *b,*x,*xs,*ls,d,*v,sum;
846   int          ierr,*idx,*diag;
847   int          n = matin->n,m = matin->m,i,shift = A->indexshift;
848 
849   PetscFunctionBegin;
850   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
851   diag = A->diag;
852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
855       PetscFunctionReturn(0);
856     }
857     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
858     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
859     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860     if (xx != bb) {
861       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862     } else {
863       b = x;
864     }
865     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
866     xs = x + shift; /* shift by one for index start of 1 */
867     ls = ls + shift;
868     while (its--) {
869       /* go down through the rows */
870       for (i=0; i<m; i++) {
871         n    = A->i[i+1] - A->i[i];
872 	PetscLogFlops(4*n+3);
873         idx  = A->j + A->i[i] + shift;
874         v    = A->a + A->i[i] + shift;
875         sum  = b[i];
876         SPARSEDENSEMDOT(sum,xs,v,idx,n);
877         d    = fshift + A->a[diag[i]+shift];
878         n    = B->i[i+1] - B->i[i];
879         idx  = B->j + B->i[i] + shift;
880         v    = B->a + B->i[i] + shift;
881         SPARSEDENSEMDOT(sum,ls,v,idx,n);
882         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
883       }
884       /* come up through the rows */
885       for (i=m-1; i>-1; i--) {
886         n    = A->i[i+1] - A->i[i];
887 	PetscLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899     }
900     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
902     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
903   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
904     if (flag & SOR_ZERO_INITIAL_GUESS) {
905       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
906       PetscFunctionReturn(0);
907     }
908     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
909     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
910     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
911     if (xx != bb) {
912       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
913     } else {
914       b = x;
915     }
916     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
917     xs = x + shift; /* shift by one for index start of 1 */
918     ls = ls + shift;
919     while (its--) {
920       for (i=0; i<m; i++) {
921         n    = A->i[i+1] - A->i[i];
922 	PetscLogFlops(4*n+3);
923         idx  = A->j + A->i[i] + shift;
924         v    = A->a + A->i[i] + shift;
925         sum  = b[i];
926         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927         d    = fshift + A->a[diag[i]+shift];
928         n    = B->i[i+1] - B->i[i];
929         idx  = B->j + B->i[i] + shift;
930         v    = B->a + B->i[i] + shift;
931         SPARSEDENSEMDOT(sum,ls,v,idx,n);
932         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
933       }
934     }
935     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
936     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
937     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
938   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
941       PetscFunctionReturn(0);
942     }
943     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
944     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
945     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
946     if (xx != bb) {
947       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
948     } else {
949       b = x;
950     }
951     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
952     xs = x + shift; /* shift by one for index start of 1 */
953     ls = ls + shift;
954     while (its--) {
955       for (i=m-1; i>-1; i--) {
956         n    = A->i[i+1] - A->i[i];
957 	PetscLogFlops(4*n+3);
958         idx  = A->j + A->i[i] + shift;
959         v    = A->a + A->i[i] + shift;
960         sum  = b[i];
961         SPARSEDENSEMDOT(sum,xs,v,idx,n);
962         d    = fshift + A->a[diag[i]+shift];
963         n    = B->i[i+1] - B->i[i];
964         idx  = B->j + B->i[i] + shift;
965         v    = B->a + B->i[i] + shift;
966         SPARSEDENSEMDOT(sum,ls,v,idx,n);
967         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
968       }
969     }
970     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
972     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   } else {
974     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatGetInfo_MPIAIJ"
981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
982 {
983   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
984   Mat        A = mat->A,B = mat->B;
985   int        ierr;
986   PetscReal  isend[5],irecv[5];
987 
988   PetscFunctionBegin;
989   info->block_size     = 1.0;
990   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
991   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
992   isend[3] = info->memory;  isend[4] = info->mallocs;
993   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
994   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
995   isend[3] += info->memory;  isend[4] += info->mallocs;
996   if (flag == MAT_LOCAL) {
997     info->nz_used      = isend[0];
998     info->nz_allocated = isend[1];
999     info->nz_unneeded  = isend[2];
1000     info->memory       = isend[3];
1001     info->mallocs      = isend[4];
1002   } else if (flag == MAT_GLOBAL_MAX) {
1003     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1004     info->nz_used      = irecv[0];
1005     info->nz_allocated = irecv[1];
1006     info->nz_unneeded  = irecv[2];
1007     info->memory       = irecv[3];
1008     info->mallocs      = irecv[4];
1009   } else if (flag == MAT_GLOBAL_SUM) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   }
1017   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1018   info->fill_ratio_needed = 0;
1019   info->factor_mallocs    = 0;
1020   info->rows_global       = (double)matin->M;
1021   info->columns_global    = (double)matin->N;
1022   info->rows_local        = (double)matin->m;
1023   info->columns_local     = (double)matin->N;
1024 
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatSetOption_MPIAIJ"
1030 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1033   int        ierr;
1034 
1035   PetscFunctionBegin;
1036   switch (op) {
1037   case MAT_NO_NEW_NONZERO_LOCATIONS:
1038   case MAT_YES_NEW_NONZERO_LOCATIONS:
1039   case MAT_COLUMNS_UNSORTED:
1040   case MAT_COLUMNS_SORTED:
1041   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1042   case MAT_KEEP_ZEROED_ROWS:
1043   case MAT_NEW_NONZERO_LOCATION_ERR:
1044   case MAT_USE_INODES:
1045   case MAT_DO_NOT_USE_INODES:
1046   case MAT_IGNORE_ZERO_ENTRIES:
1047     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1048     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1049     break;
1050   case MAT_ROW_ORIENTED:
1051     a->roworiented = PETSC_TRUE;
1052     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1053     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1054     break;
1055   case MAT_ROWS_SORTED:
1056   case MAT_ROWS_UNSORTED:
1057   case MAT_YES_NEW_DIAGONALS:
1058   case MAT_USE_SINGLE_PRECISION_SOLVES:
1059     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1060     break;
1061   case MAT_COLUMN_ORIENTED:
1062     a->roworiented = PETSC_FALSE;
1063     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1064     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1065     break;
1066   case MAT_IGNORE_OFF_PROC_ENTRIES:
1067     a->donotstash = PETSC_TRUE;
1068     break;
1069   case MAT_NO_NEW_DIAGONALS:
1070     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1071     break;
1072   default:
1073     SETERRQ(PETSC_ERR_SUP,"unknown option");
1074     break;
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ"
1081 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1084 
1085   PetscFunctionBegin;
1086   if (m) *m = mat->rstart;
1087   if (n) *n = mat->rend;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRow_MPIAIJ"
1093 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1094 {
1095   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1096   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1097   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1098   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1099   int          *cmap,*idx_p;
1100 
1101   PetscFunctionBegin;
1102   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1103   mat->getrowactive = PETSC_TRUE;
1104 
1105   if (!mat->rowvalues && (idx || v)) {
1106     /*
1107         allocate enough space to hold information from the longest row.
1108     */
1109     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1110     int     max = 1,tmp;
1111     for (i=0; i<matin->m; i++) {
1112       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1113       if (max < tmp) { max = tmp; }
1114     }
1115     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1116     mat->rowindices = (int*)(mat->rowvalues + max);
1117   }
1118 
1119   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1120   lrow = row - rstart;
1121 
1122   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1123   if (!v)   {pvA = 0; pvB = 0;}
1124   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1125   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1126   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1127   nztot = nzA + nzB;
1128 
1129   cmap  = mat->garray;
1130   if (v  || idx) {
1131     if (nztot) {
1132       /* Sort by increasing column numbers, assuming A and B already sorted */
1133       int imark = -1;
1134       if (v) {
1135         *v = v_p = mat->rowvalues;
1136         for (i=0; i<nzB; i++) {
1137           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1138           else break;
1139         }
1140         imark = i;
1141         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1142         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1143       }
1144       if (idx) {
1145         *idx = idx_p = mat->rowindices;
1146         if (imark > -1) {
1147           for (i=0; i<imark; i++) {
1148             idx_p[i] = cmap[cworkB[i]];
1149           }
1150         } else {
1151           for (i=0; i<nzB; i++) {
1152             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1153             else break;
1154           }
1155           imark = i;
1156         }
1157         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1158         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1159       }
1160     } else {
1161       if (idx) *idx = 0;
1162       if (v)   *v   = 0;
1163     }
1164   }
1165   *nz = nztot;
1166   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1167   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1173 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1174 {
1175   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1176 
1177   PetscFunctionBegin;
1178   if (aij->getrowactive == PETSC_FALSE) {
1179     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1180   }
1181   aij->getrowactive = PETSC_FALSE;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatNorm_MPIAIJ"
1187 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1188 {
1189   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1191   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1192   PetscReal    sum = 0.0;
1193   PetscScalar  *v;
1194 
1195   PetscFunctionBegin;
1196   if (aij->size == 1) {
1197     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1198   } else {
1199     if (type == NORM_FROBENIUS) {
1200       v = amat->a;
1201       for (i=0; i<amat->nz; i++) {
1202 #if defined(PETSC_USE_COMPLEX)
1203         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1204 #else
1205         sum += (*v)*(*v); v++;
1206 #endif
1207       }
1208       v = bmat->a;
1209       for (i=0; i<bmat->nz; i++) {
1210 #if defined(PETSC_USE_COMPLEX)
1211         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1212 #else
1213         sum += (*v)*(*v); v++;
1214 #endif
1215       }
1216       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1217       *norm = sqrt(*norm);
1218     } else if (type == NORM_1) { /* max column norm */
1219       PetscReal *tmp,*tmp2;
1220       int    *jj,*garray = aij->garray;
1221       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1222       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1223       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1224       *norm = 0.0;
1225       v = amat->a; jj = amat->j;
1226       for (j=0; j<amat->nz; j++) {
1227         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1228       }
1229       v = bmat->a; jj = bmat->j;
1230       for (j=0; j<bmat->nz; j++) {
1231         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1232       }
1233       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1234       for (j=0; j<mat->N; j++) {
1235         if (tmp2[j] > *norm) *norm = tmp2[j];
1236       }
1237       ierr = PetscFree(tmp);CHKERRQ(ierr);
1238       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1239     } else if (type == NORM_INFINITY) { /* max row norm */
1240       PetscReal ntemp = 0.0;
1241       for (j=0; j<aij->A->m; j++) {
1242         v = amat->a + amat->i[j] + shift;
1243         sum = 0.0;
1244         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1245           sum += PetscAbsScalar(*v); v++;
1246         }
1247         v = bmat->a + bmat->i[j] + shift;
1248         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1249           sum += PetscAbsScalar(*v); v++;
1250         }
1251         if (sum > ntemp) ntemp = sum;
1252       }
1253       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1254     } else {
1255       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1256     }
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatTranspose_MPIAIJ"
1263 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1264 {
1265   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1266   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1267   int          ierr,shift = Aloc->indexshift;
1268   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1269   Mat          B;
1270   PetscScalar  *array;
1271 
1272   PetscFunctionBegin;
1273   if (!matout && M != N) {
1274     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1275   }
1276 
1277   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1278 
1279   /* copy over the A part */
1280   Aloc = (Mat_SeqAIJ*)a->A->data;
1281   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1282   row = a->rstart;
1283   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1284   for (i=0; i<m; i++) {
1285     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1286     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1287   }
1288   aj = Aloc->j;
1289   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1290 
1291   /* copy over the B part */
1292   Aloc = (Mat_SeqAIJ*)a->B->data;
1293   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1294   row  = a->rstart;
1295   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1296   ct   = cols;
1297   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1298   for (i=0; i<m; i++) {
1299     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1300     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1301   }
1302   ierr = PetscFree(ct);CHKERRQ(ierr);
1303   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1304   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1305   if (matout) {
1306     *matout = B;
1307   } else {
1308     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1315 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1316 {
1317   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1318   Mat        a = aij->A,b = aij->B;
1319   int        ierr,s1,s2,s3;
1320 
1321   PetscFunctionBegin;
1322   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1323   if (rr) {
1324     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1325     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1326     /* Overlap communication with computation. */
1327     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1328   }
1329   if (ll) {
1330     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1331     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1332     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1333   }
1334   /* scale  the diagonal block */
1335   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1336 
1337   if (rr) {
1338     /* Do a scatter end and then right scale the off-diagonal block */
1339     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1340     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1341   }
1342 
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1349 int MatPrintHelp_MPIAIJ(Mat A)
1350 {
1351   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1352   int        ierr;
1353 
1354   PetscFunctionBegin;
1355   if (!a->rank) {
1356     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1363 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1364 {
1365   PetscFunctionBegin;
1366   *bs = 1;
1367   PetscFunctionReturn(0);
1368 }
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1371 int MatSetUnfactored_MPIAIJ(Mat A)
1372 {
1373   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1374   int        ierr;
1375 
1376   PetscFunctionBegin;
1377   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatEqual_MPIAIJ"
1383 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1384 {
1385   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1386   Mat        a,b,c,d;
1387   PetscTruth flg;
1388   int        ierr;
1389 
1390   PetscFunctionBegin;
1391   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1392   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1393   a = matA->A; b = matA->B;
1394   c = matB->A; d = matB->B;
1395 
1396   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1397   if (flg == PETSC_TRUE) {
1398     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1399   }
1400   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatCopy_MPIAIJ"
1406 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1407 {
1408   int        ierr;
1409   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1410   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1411   PetscTruth flg;
1412 
1413   PetscFunctionBegin;
1414   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1415   if (str != SAME_NONZERO_PATTERN || !flg) {
1416     /* because of the column compression in the off-processor part of the matrix a->B,
1417        the number of columns in a->B and b->B may be different, hence we cannot call
1418        the MatCopy() directly on the two parts. If need be, we can provide a more
1419        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1420        then copying the submatrices */
1421     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1422   } else {
1423     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1424     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1431 int MatSetUpPreallocation_MPIAIJ(Mat A)
1432 {
1433   int        ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1441 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1442 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1443 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1444 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1445 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1446 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1447 #endif
1448 
1449 /* -------------------------------------------------------------------*/
1450 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1451        MatGetRow_MPIAIJ,
1452        MatRestoreRow_MPIAIJ,
1453        MatMult_MPIAIJ,
1454        MatMultAdd_MPIAIJ,
1455        MatMultTranspose_MPIAIJ,
1456        MatMultTransposeAdd_MPIAIJ,
1457        0,
1458        0,
1459        0,
1460        0,
1461        0,
1462        0,
1463        MatRelax_MPIAIJ,
1464        MatTranspose_MPIAIJ,
1465        MatGetInfo_MPIAIJ,
1466        MatEqual_MPIAIJ,
1467        MatGetDiagonal_MPIAIJ,
1468        MatDiagonalScale_MPIAIJ,
1469        MatNorm_MPIAIJ,
1470        MatAssemblyBegin_MPIAIJ,
1471        MatAssemblyEnd_MPIAIJ,
1472        0,
1473        MatSetOption_MPIAIJ,
1474        MatZeroEntries_MPIAIJ,
1475        MatZeroRows_MPIAIJ,
1476 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1477 				       MatLUFactorSymbolic_MPIAIJ_TFS,
1478 #else
1479        0,
1480 #endif
1481        0,
1482        0,
1483        0,
1484        MatSetUpPreallocation_MPIAIJ,
1485        0,
1486        MatGetOwnershipRange_MPIAIJ,
1487        0,
1488        0,
1489        0,
1490        0,
1491        MatDuplicate_MPIAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496        0,
1497        MatGetSubMatrices_MPIAIJ,
1498        MatIncreaseOverlap_MPIAIJ,
1499        MatGetValues_MPIAIJ,
1500        MatCopy_MPIAIJ,
1501        MatPrintHelp_MPIAIJ,
1502        MatScale_MPIAIJ,
1503        0,
1504        0,
1505        0,
1506        MatGetBlockSize_MPIAIJ,
1507        0,
1508        0,
1509        0,
1510        0,
1511        MatFDColoringCreate_MPIAIJ,
1512        0,
1513        MatSetUnfactored_MPIAIJ,
1514        0,
1515        0,
1516        MatGetSubMatrix_MPIAIJ,
1517        MatDestroy_MPIAIJ,
1518        MatView_MPIAIJ,
1519        MatGetPetscMaps_Petsc,
1520        0,
1521        0,
1522        0,
1523        0,
1524        0,
1525        0,
1526        0,
1527        0,
1528        MatSetColoring_MPIAIJ,
1529        MatSetValuesAdic_MPIAIJ,
1530        MatSetValuesAdifor_MPIAIJ
1531 };
1532 
1533 /* ----------------------------------------------------------------------------------------*/
1534 
1535 EXTERN_C_BEGIN
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1538 int MatStoreValues_MPIAIJ(Mat mat)
1539 {
1540   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1541   int        ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1545   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 EXTERN_C_END
1549 
1550 EXTERN_C_BEGIN
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1553 int MatRetrieveValues_MPIAIJ(Mat mat)
1554 {
1555   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1556   int        ierr;
1557 
1558   PetscFunctionBegin;
1559   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1560   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 EXTERN_C_END
1564 
1565 #include "petscpc.h"
1566 EXTERN_C_BEGIN
1567 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1568 EXTERN_C_END
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatCreate_MPIAIJ"
1573 int MatCreate_MPIAIJ(Mat B)
1574 {
1575   Mat_MPIAIJ   *b;
1576   int          ierr,i,size;
1577 
1578   PetscFunctionBegin;
1579   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1580 
1581   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1582   B->data         = (void*)b;
1583   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1584   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1585   B->factor       = 0;
1586   B->assembled    = PETSC_FALSE;
1587   B->mapping      = 0;
1588 
1589   B->insertmode      = NOT_SET_VALUES;
1590   b->size            = size;
1591   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1592 
1593   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1594   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1595 
1596   /* the information in the maps duplicates the information computed below, eventually
1597      we should remove the duplicate information that is not contained in the maps */
1598   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1599   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1600 
1601   /* build local table of row and column ownerships */
1602   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1603   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1604   b->cowners = b->rowners + b->size + 2;
1605   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1606   b->rowners[0] = 0;
1607   for (i=2; i<=b->size; i++) {
1608     b->rowners[i] += b->rowners[i-1];
1609   }
1610   b->rstart = b->rowners[b->rank];
1611   b->rend   = b->rowners[b->rank+1];
1612   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1613   b->cowners[0] = 0;
1614   for (i=2; i<=b->size; i++) {
1615     b->cowners[i] += b->cowners[i-1];
1616   }
1617   b->cstart = b->cowners[b->rank];
1618   b->cend   = b->cowners[b->rank+1];
1619 
1620   /* build cache for off array entries formed */
1621   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1622   b->donotstash  = PETSC_FALSE;
1623   b->colmap      = 0;
1624   b->garray      = 0;
1625   b->roworiented = PETSC_TRUE;
1626 
1627   /* stuff used for matrix vector multiply */
1628   b->lvec      = PETSC_NULL;
1629   b->Mvctx     = PETSC_NULL;
1630 
1631   /* stuff for MatGetRow() */
1632   b->rowindices   = 0;
1633   b->rowvalues    = 0;
1634   b->getrowactive = PETSC_FALSE;
1635 
1636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1637                                      "MatStoreValues_MPIAIJ",
1638                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1640                                      "MatRetrieveValues_MPIAIJ",
1641                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1643                                      "MatGetDiagonalBlock_MPIAIJ",
1644                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1645   PetscFunctionReturn(0);
1646 }
1647 EXTERN_C_END
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1651 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1652 {
1653   Mat        mat;
1654   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1655   int        ierr;
1656 
1657   PetscFunctionBegin;
1658   *newmat       = 0;
1659   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1660   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1661   a    = (Mat_MPIAIJ*)mat->data;
1662   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1663   mat->factor       = matin->factor;
1664   mat->assembled    = PETSC_TRUE;
1665   mat->insertmode   = NOT_SET_VALUES;
1666   mat->preallocated = PETSC_TRUE;
1667 
1668   a->rstart       = oldmat->rstart;
1669   a->rend         = oldmat->rend;
1670   a->cstart       = oldmat->cstart;
1671   a->cend         = oldmat->cend;
1672   a->size         = oldmat->size;
1673   a->rank         = oldmat->rank;
1674   a->donotstash   = oldmat->donotstash;
1675   a->roworiented  = oldmat->roworiented;
1676   a->rowindices   = 0;
1677   a->rowvalues    = 0;
1678   a->getrowactive = PETSC_FALSE;
1679 
1680   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1681   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1682   if (oldmat->colmap) {
1683 #if defined (PETSC_USE_CTABLE)
1684     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1685 #else
1686     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1687     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1688     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1689 #endif
1690   } else a->colmap = 0;
1691   if (oldmat->garray) {
1692     int len;
1693     len  = oldmat->B->n;
1694     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1695     PetscLogObjectMemory(mat,len*sizeof(int));
1696     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1697   } else a->garray = 0;
1698 
1699   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1700   PetscLogObjectParent(mat,a->lvec);
1701   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1702   PetscLogObjectParent(mat,a->Mvctx);
1703   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1704   PetscLogObjectParent(mat,a->A);
1705   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1706   PetscLogObjectParent(mat,a->B);
1707   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1708   *newmat = mat;
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #include "petscsys.h"
1713 
1714 EXTERN_C_BEGIN
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatLoad_MPIAIJ"
1717 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1718 {
1719   Mat          A;
1720   PetscScalar  *vals,*svals;
1721   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1722   MPI_Status   status;
1723   int          i,nz,ierr,j,rstart,rend,fd;
1724   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1725   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1726   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1727 
1728   PetscFunctionBegin;
1729   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1730   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1731   if (!rank) {
1732     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1733     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1734     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1735     if (header[3] < 0) {
1736       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1737     }
1738   }
1739 
1740   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1741   M = header[1]; N = header[2];
1742   /* determine ownership of all rows */
1743   m = M/size + ((M % size) > rank);
1744   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1745   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1746   rowners[0] = 0;
1747   for (i=2; i<=size; i++) {
1748     rowners[i] += rowners[i-1];
1749   }
1750   rstart = rowners[rank];
1751   rend   = rowners[rank+1];
1752 
1753   /* distribute row lengths to all processors */
1754   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1755   offlens = ourlens + (rend-rstart);
1756   if (!rank) {
1757     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1758     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1759     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1760     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1761     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1762     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1763   } else {
1764     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1765   }
1766 
1767   if (!rank) {
1768     /* calculate the number of nonzeros on each processor */
1769     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1770     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1771     for (i=0; i<size; i++) {
1772       for (j=rowners[i]; j< rowners[i+1]; j++) {
1773         procsnz[i] += rowlengths[j];
1774       }
1775     }
1776     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1777 
1778     /* determine max buffer needed and allocate it */
1779     maxnz = 0;
1780     for (i=0; i<size; i++) {
1781       maxnz = PetscMax(maxnz,procsnz[i]);
1782     }
1783     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1784 
1785     /* read in my part of the matrix column indices  */
1786     nz   = procsnz[0];
1787     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1788     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1789 
1790     /* read in every one elses and ship off */
1791     for (i=1; i<size; i++) {
1792       nz   = procsnz[i];
1793       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1794       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1795     }
1796     ierr = PetscFree(cols);CHKERRQ(ierr);
1797   } else {
1798     /* determine buffer space needed for message */
1799     nz = 0;
1800     for (i=0; i<m; i++) {
1801       nz += ourlens[i];
1802     }
1803     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1804 
1805     /* receive message of column indices*/
1806     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1807     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1808     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1809   }
1810 
1811   /* determine column ownership if matrix is not square */
1812   if (N != M) {
1813     n      = N/size + ((N % size) > rank);
1814     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1815     cstart = cend - n;
1816   } else {
1817     cstart = rstart;
1818     cend   = rend;
1819     n      = cend - cstart;
1820   }
1821 
1822   /* loop over local rows, determining number of off diagonal entries */
1823   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1824   jj = 0;
1825   for (i=0; i<m; i++) {
1826     for (j=0; j<ourlens[i]; j++) {
1827       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1828       jj++;
1829     }
1830   }
1831 
1832   /* create our matrix */
1833   for (i=0; i<m; i++) {
1834     ourlens[i] -= offlens[i];
1835   }
1836   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1837   A = *newmat;
1838   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1839   for (i=0; i<m; i++) {
1840     ourlens[i] += offlens[i];
1841   }
1842 
1843   if (!rank) {
1844     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1845 
1846     /* read in my part of the matrix numerical values  */
1847     nz   = procsnz[0];
1848     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1849 
1850     /* insert into matrix */
1851     jj      = rstart;
1852     smycols = mycols;
1853     svals   = vals;
1854     for (i=0; i<m; i++) {
1855       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1856       smycols += ourlens[i];
1857       svals   += ourlens[i];
1858       jj++;
1859     }
1860 
1861     /* read in other processors and ship out */
1862     for (i=1; i<size; i++) {
1863       nz   = procsnz[i];
1864       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1865       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1866     }
1867     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1868   } else {
1869     /* receive numeric values */
1870     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1871 
1872     /* receive message of values*/
1873     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1874     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1875     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1876 
1877     /* insert into matrix */
1878     jj      = rstart;
1879     smycols = mycols;
1880     svals   = vals;
1881     for (i=0; i<m; i++) {
1882       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1883       smycols += ourlens[i];
1884       svals   += ourlens[i];
1885       jj++;
1886     }
1887   }
1888   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1889   ierr = PetscFree(vals);CHKERRQ(ierr);
1890   ierr = PetscFree(mycols);CHKERRQ(ierr);
1891   ierr = PetscFree(rowners);CHKERRQ(ierr);
1892 
1893   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1894   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 EXTERN_C_END
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1901 /*
1902     Not great since it makes two copies of the submatrix, first an SeqAIJ
1903   in local and then by concatenating the local matrices the end result.
1904   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1905 */
1906 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1907 {
1908   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1909   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1910   Mat          *local,M,Mreuse;
1911   PetscScalar  *vwork,*aa;
1912   MPI_Comm     comm = mat->comm;
1913   Mat_SeqAIJ   *aij;
1914 
1915 
1916   PetscFunctionBegin;
1917   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1918   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1919 
1920   if (call ==  MAT_REUSE_MATRIX) {
1921     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1922     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1923     local = &Mreuse;
1924     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1925   } else {
1926     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1927     Mreuse = *local;
1928     ierr   = PetscFree(local);CHKERRQ(ierr);
1929   }
1930 
1931   /*
1932       m - number of local rows
1933       n - number of columns (same on all processors)
1934       rstart - first row in new global matrix generated
1935   */
1936   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1937   if (call == MAT_INITIAL_MATRIX) {
1938     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1939     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1940     ii  = aij->i;
1941     jj  = aij->j;
1942 
1943     /*
1944         Determine the number of non-zeros in the diagonal and off-diagonal
1945         portions of the matrix in order to do correct preallocation
1946     */
1947 
1948     /* first get start and end of "diagonal" columns */
1949     if (csize == PETSC_DECIDE) {
1950       nlocal = n/size + ((n % size) > rank);
1951     } else {
1952       nlocal = csize;
1953     }
1954     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1955     rstart = rend - nlocal;
1956     if (rank == size - 1 && rend != n) {
1957       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1958     }
1959 
1960     /* next, compute all the lengths */
1961     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1962     olens = dlens + m;
1963     for (i=0; i<m; i++) {
1964       jend = ii[i+1] - ii[i];
1965       olen = 0;
1966       dlen = 0;
1967       for (j=0; j<jend; j++) {
1968         if (*jj < rstart || *jj >= rend) olen++;
1969         else dlen++;
1970         jj++;
1971       }
1972       olens[i] = olen;
1973       dlens[i] = dlen;
1974     }
1975     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1976     ierr = PetscFree(dlens);CHKERRQ(ierr);
1977   } else {
1978     int ml,nl;
1979 
1980     M = *newmat;
1981     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1982     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1983     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1984     /*
1985          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1986        rather than the slower MatSetValues().
1987     */
1988     M->was_assembled = PETSC_TRUE;
1989     M->assembled     = PETSC_FALSE;
1990   }
1991   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1992   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1993   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1994   ii  = aij->i;
1995   jj  = aij->j;
1996   aa  = aij->a;
1997   for (i=0; i<m; i++) {
1998     row   = rstart + i;
1999     nz    = ii[i+1] - ii[i];
2000     cwork = jj;     jj += nz;
2001     vwork = aa;     aa += nz;
2002     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2003   }
2004 
2005   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2007   *newmat = M;
2008 
2009   /* save submatrix used in processor for next request */
2010   if (call ==  MAT_INITIAL_MATRIX) {
2011     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2012     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2013   }
2014 
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2020 /*@C
2021    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2022    (the default parallel PETSc format).  For good matrix assembly performance
2023    the user should preallocate the matrix storage by setting the parameters
2024    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2025    performance can be increased by more than a factor of 50.
2026 
2027    Collective on MPI_Comm
2028 
2029    Input Parameters:
2030 +  A - the matrix
2031 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2032            (same value is used for all local rows)
2033 .  d_nnz - array containing the number of nonzeros in the various rows of the
2034            DIAGONAL portion of the local submatrix (possibly different for each row)
2035            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2036            The size of this array is equal to the number of local rows, i.e 'm'.
2037            You must leave room for the diagonal entry even if it is zero.
2038 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2039            submatrix (same value is used for all local rows).
2040 -  o_nnz - array containing the number of nonzeros in the various rows of the
2041            OFF-DIAGONAL portion of the local submatrix (possibly different for
2042            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2043            structure. The size of this array is equal to the number
2044            of local rows, i.e 'm'.
2045 
2046    The AIJ format (also called the Yale sparse matrix format or
2047    compressed row storage), is fully compatible with standard Fortran 77
2048    storage.  That is, the stored row and column indices can begin at
2049    either one (as in Fortran) or zero.  See the users manual for details.
2050 
2051    The user MUST specify either the local or global matrix dimensions
2052    (possibly both).
2053 
2054    The parallel matrix is partitioned such that the first m0 rows belong to
2055    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2056    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2057 
2058    The DIAGONAL portion of the local submatrix of a processor can be defined
2059    as the submatrix which is obtained by extraction the part corresponding
2060    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2061    first row that belongs to the processor, and r2 is the last row belonging
2062    to the this processor. This is a square mxm matrix. The remaining portion
2063    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2064 
2065    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2066 
2067    By default, this format uses inodes (identical nodes) when possible.
2068    We search for consecutive rows with the same nonzero structure, thereby
2069    reusing matrix information to achieve increased efficiency.
2070 
2071    Options Database Keys:
2072 +  -mat_aij_no_inode  - Do not use inodes
2073 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2074 -  -mat_aij_oneindex - Internally use indexing starting at 1
2075         rather than 0.  Note that when calling MatSetValues(),
2076         the user still MUST index entries starting at 0!
2077 
2078    Example usage:
2079 
2080    Consider the following 8x8 matrix with 34 non-zero values, that is
2081    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2082    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2083    as follows:
2084 
2085 .vb
2086             1  2  0  |  0  3  0  |  0  4
2087     Proc0   0  5  6  |  7  0  0  |  8  0
2088             9  0 10  | 11  0  0  | 12  0
2089     -------------------------------------
2090            13  0 14  | 15 16 17  |  0  0
2091     Proc1   0 18  0  | 19 20 21  |  0  0
2092             0  0  0  | 22 23  0  | 24  0
2093     -------------------------------------
2094     Proc2  25 26 27  |  0  0 28  | 29  0
2095            30  0  0  | 31 32 33  |  0 34
2096 .ve
2097 
2098    This can be represented as a collection of submatrices as:
2099 
2100 .vb
2101       A B C
2102       D E F
2103       G H I
2104 .ve
2105 
2106    Where the submatrices A,B,C are owned by proc0, D,E,F are
2107    owned by proc1, G,H,I are owned by proc2.
2108 
2109    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2110    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2111    The 'M','N' parameters are 8,8, and have the same values on all procs.
2112 
2113    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2114    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2115    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2116    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2117    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2118    matrix, ans [DF] as another SeqAIJ matrix.
2119 
2120    When d_nz, o_nz parameters are specified, d_nz storage elements are
2121    allocated for every row of the local diagonal submatrix, and o_nz
2122    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2123    One way to choose d_nz and o_nz is to use the max nonzerors per local
2124    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2125    In this case, the values of d_nz,o_nz are:
2126 .vb
2127      proc0 : dnz = 2, o_nz = 2
2128      proc1 : dnz = 3, o_nz = 2
2129      proc2 : dnz = 1, o_nz = 4
2130 .ve
2131    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2132    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2133    for proc3. i.e we are using 12+15+10=37 storage locations to store
2134    34 values.
2135 
2136    When d_nnz, o_nnz parameters are specified, the storage is specified
2137    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2138    In the above case the values for d_nnz,o_nnz are:
2139 .vb
2140      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2141      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2142      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2143 .ve
2144    Here the space allocated is sum of all the above values i.e 34, and
2145    hence pre-allocation is perfect.
2146 
2147    Level: intermediate
2148 
2149 .keywords: matrix, aij, compressed row, sparse, parallel
2150 
2151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2152 @*/
2153 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2154 {
2155   Mat_MPIAIJ   *b;
2156   int          ierr,i;
2157   PetscTruth   flg2;
2158 
2159   PetscFunctionBegin;
2160   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2161   if (!flg2) PetscFunctionReturn(0);
2162   B->preallocated = PETSC_TRUE;
2163   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2164   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2165   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2166   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2167   if (d_nnz) {
2168     for (i=0; i<B->m; i++) {
2169       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2170     }
2171   }
2172   if (o_nnz) {
2173     for (i=0; i<B->m; i++) {
2174       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2175     }
2176   }
2177   b = (Mat_MPIAIJ*)B->data;
2178 
2179   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2180   PetscLogObjectParent(B,b->A);
2181   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2182   PetscLogObjectParent(B,b->B);
2183 
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatCreateMPIAIJ"
2189 /*@C
2190    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2191    (the default parallel PETSc format).  For good matrix assembly performance
2192    the user should preallocate the matrix storage by setting the parameters
2193    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2194    performance can be increased by more than a factor of 50.
2195 
2196    Collective on MPI_Comm
2197 
2198    Input Parameters:
2199 +  comm - MPI communicator
2200 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2201            This value should be the same as the local size used in creating the
2202            y vector for the matrix-vector product y = Ax.
2203 .  n - This value should be the same as the local size used in creating the
2204        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2205        calculated if N is given) For square matrices n is almost always m.
2206 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2207 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2208 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2209            (same value is used for all local rows)
2210 .  d_nnz - array containing the number of nonzeros in the various rows of the
2211            DIAGONAL portion of the local submatrix (possibly different for each row)
2212            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2213            The size of this array is equal to the number of local rows, i.e 'm'.
2214            You must leave room for the diagonal entry even if it is zero.
2215 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2216            submatrix (same value is used for all local rows).
2217 -  o_nnz - array containing the number of nonzeros in the various rows of the
2218            OFF-DIAGONAL portion of the local submatrix (possibly different for
2219            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2220            structure. The size of this array is equal to the number
2221            of local rows, i.e 'm'.
2222 
2223    Output Parameter:
2224 .  A - the matrix
2225 
2226    Notes:
2227    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2228    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2229    storage requirements for this matrix.
2230 
2231    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2232    processor than it must be used on all processors that share the object for
2233    that argument.
2234 
2235    The AIJ format (also called the Yale sparse matrix format or
2236    compressed row storage), is fully compatible with standard Fortran 77
2237    storage.  That is, the stored row and column indices can begin at
2238    either one (as in Fortran) or zero.  See the users manual for details.
2239 
2240    The user MUST specify either the local or global matrix dimensions
2241    (possibly both).
2242 
2243    The parallel matrix is partitioned such that the first m0 rows belong to
2244    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2245    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2246 
2247    The DIAGONAL portion of the local submatrix of a processor can be defined
2248    as the submatrix which is obtained by extraction the part corresponding
2249    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2250    first row that belongs to the processor, and r2 is the last row belonging
2251    to the this processor. This is a square mxm matrix. The remaining portion
2252    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2253 
2254    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2255 
2256    By default, this format uses inodes (identical nodes) when possible.
2257    We search for consecutive rows with the same nonzero structure, thereby
2258    reusing matrix information to achieve increased efficiency.
2259 
2260    Options Database Keys:
2261 +  -mat_aij_no_inode  - Do not use inodes
2262 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2263 -  -mat_aij_oneindex - Internally use indexing starting at 1
2264         rather than 0.  Note that when calling MatSetValues(),
2265         the user still MUST index entries starting at 0!
2266 
2267 
2268    Example usage:
2269 
2270    Consider the following 8x8 matrix with 34 non-zero values, that is
2271    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2272    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2273    as follows:
2274 
2275 .vb
2276             1  2  0  |  0  3  0  |  0  4
2277     Proc0   0  5  6  |  7  0  0  |  8  0
2278             9  0 10  | 11  0  0  | 12  0
2279     -------------------------------------
2280            13  0 14  | 15 16 17  |  0  0
2281     Proc1   0 18  0  | 19 20 21  |  0  0
2282             0  0  0  | 22 23  0  | 24  0
2283     -------------------------------------
2284     Proc2  25 26 27  |  0  0 28  | 29  0
2285            30  0  0  | 31 32 33  |  0 34
2286 .ve
2287 
2288    This can be represented as a collection of submatrices as:
2289 
2290 .vb
2291       A B C
2292       D E F
2293       G H I
2294 .ve
2295 
2296    Where the submatrices A,B,C are owned by proc0, D,E,F are
2297    owned by proc1, G,H,I are owned by proc2.
2298 
2299    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2300    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2301    The 'M','N' parameters are 8,8, and have the same values on all procs.
2302 
2303    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2304    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2305    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2306    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2307    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2308    matrix, ans [DF] as another SeqAIJ matrix.
2309 
2310    When d_nz, o_nz parameters are specified, d_nz storage elements are
2311    allocated for every row of the local diagonal submatrix, and o_nz
2312    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2313    One way to choose d_nz and o_nz is to use the max nonzerors per local
2314    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2315    In this case, the values of d_nz,o_nz are:
2316 .vb
2317      proc0 : dnz = 2, o_nz = 2
2318      proc1 : dnz = 3, o_nz = 2
2319      proc2 : dnz = 1, o_nz = 4
2320 .ve
2321    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2322    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2323    for proc3. i.e we are using 12+15+10=37 storage locations to store
2324    34 values.
2325 
2326    When d_nnz, o_nnz parameters are specified, the storage is specified
2327    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2328    In the above case the values for d_nnz,o_nnz are:
2329 .vb
2330      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2331      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2332      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2333 .ve
2334    Here the space allocated is sum of all the above values i.e 34, and
2335    hence pre-allocation is perfect.
2336 
2337    Level: intermediate
2338 
2339 .keywords: matrix, aij, compressed row, sparse, parallel
2340 
2341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2342 @*/
2343 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2344 {
2345   int ierr,size;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2349   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2350   if (size > 1) {
2351     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2352     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2353   } else {
2354     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2355     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2356   }
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2362 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2363 {
2364   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2365   PetscFunctionBegin;
2366   *Ad     = a->A;
2367   *Ao     = a->B;
2368   *colmap = a->garray;
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2374 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2375 {
2376   int        ierr;
2377   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2378 
2379   PetscFunctionBegin;
2380   if (coloring->ctype == IS_COLORING_LOCAL) {
2381     int        *allcolors,*colors,i;
2382     ISColoring ocoloring;
2383 
2384     /* set coloring for diagonal portion */
2385     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2386 
2387     /* set coloring for off-diagonal portion */
2388     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2389     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2390     for (i=0; i<a->B->n; i++) {
2391       colors[i] = allcolors[a->garray[i]];
2392     }
2393     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2394     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2395     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2396     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2397   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2398     int        *colors,i,*larray;
2399     ISColoring ocoloring;
2400 
2401     /* set coloring for diagonal portion */
2402     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2403     for (i=0; i<a->A->n; i++) {
2404       larray[i] = i + a->cstart;
2405     }
2406     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2407     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2408     for (i=0; i<a->A->n; i++) {
2409       colors[i] = coloring->colors[larray[i]];
2410     }
2411     ierr = PetscFree(larray);CHKERRQ(ierr);
2412     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2413     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2414     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2415 
2416     /* set coloring for off-diagonal portion */
2417     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2418     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2419     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2420     for (i=0; i<a->B->n; i++) {
2421       colors[i] = coloring->colors[larray[i]];
2422     }
2423     ierr = PetscFree(larray);CHKERRQ(ierr);
2424     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2425     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2426     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2427   } else {
2428     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2429   }
2430 
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2436 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2437 {
2438   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2439   int        ierr;
2440 
2441   PetscFunctionBegin;
2442   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2443   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2449 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2450 {
2451   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2452   int        ierr;
2453 
2454   PetscFunctionBegin;
2455   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2456   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459