xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 329f5518e9d4bb7ce96c0c5576cc53785c973973)
1 /*$Id: mpiaij.c,v 1.308 1999/11/24 21:53:50 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 extern int MatSetUpMultiply_MPIAIJ(Mat);
8 extern int DisAssemble_MPIAIJ(Mat);
9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 extern int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int*)PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=0; _i<nrow; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<a->m+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=0; _i<nrow; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<b->m+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
198   Scalar     value;
199   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int        cstart = aij->cstart,cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
206   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208   PetscTruth ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE:PETSC_FALSE);
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
211   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         }
234         else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNC__
281 #define __FUNC__ "MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
291     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
296         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNC__
324 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNC__
351 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   Scalar      *val;
358   InsertMode  addv = mat->insertmode;
359 
360   PetscFunctionBegin;
361   if (!aij->donotstash) {
362     while (1) {
363       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
364       if (!flg) break;
365 
366       for (i=0; i<n;) {
367         /* Now identify the consecutive vals belonging to the same row */
368         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
369         if (j < n) ncols = j-i;
370         else       ncols = n-i;
371         /* Now assemble all these values with a single function call */
372         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
373         i = j;
374       }
375     }
376     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
377   }
378 
379   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
380   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
381 
382   /* determine if any processor has disassembled, if so we must
383      also disassemble ourselfs, in order that we may reassemble. */
384   /*
385      if nonzero structure of submatrix B cannot change then we know that
386      no processor disassembled thus we can skip this stuff
387   */
388   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
389     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
390     if (mat->was_assembled && !other_disassembled) {
391       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
392     }
393   }
394 
395   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
396     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
397   }
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatZeroEntries_MPIAIJ"
410 int MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   int        ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNC__
422 #define __FUNC__ "MatZeroRows_MPIAIJ"
423 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
427   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
428   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
429   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
430   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
431   MPI_Comm       comm = A->comm;
432   MPI_Request    *send_waits,*recv_waits;
433   MPI_Status     recv_status,*send_status;
434   IS             istmp;
435 
436   PetscFunctionBegin;
437   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
438   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
439 
440   /*  first count number of contributors to each processor */
441   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
442   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
443   procs  = nprocs + size;
444   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
445   for (i=0; i<N; i++) {
446     idx = rows[i];
447     found = 0;
448     for (j=0; j<size; j++) {
449       if (idx >= owners[j] && idx < owners[j+1]) {
450         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
451       }
452     }
453     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
454   }
455   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
456 
457   /* inform other processors of number of messages and max length*/
458   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
459   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
460   nrecvs = work[size+rank];
461   nmax   = work[rank];
462   ierr   = PetscFree(work);CHKERRQ(ierr);
463 
464   /* post receives:   */
465   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
466   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
467   for (i=0; i<nrecvs; i++) {
468     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
476   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
477   starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
478   starts[0] = 0;
479   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
480   for (i=0; i<N; i++) {
481     svalues[starts[owner[i]]++] = rows[i];
482   }
483   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
484 
485   starts[0] = 0;
486   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   count = 0;
488   for (i=0; i<size; i++) {
489     if (procs[i]) {
490       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
491     }
492   }
493   ierr = PetscFree(starts);CHKERRQ(ierr);
494 
495   base = owners[rank];
496 
497   /*  wait on receives */
498   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
499   source = lens + nrecvs;
500   count  = nrecvs; slen = 0;
501   while (count) {
502     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
503     /* unpack receives into our local space */
504     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
505     source[imdex]  = recv_status.MPI_SOURCE;
506     lens[imdex]    = n;
507     slen          += n;
508     count--;
509   }
510   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
511 
512   /* move the data into the send scatter */
513   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
514   count = 0;
515   for (i=0; i<nrecvs; i++) {
516     values = rvalues + i*nmax;
517     for (j=0; j<lens[i]; j++) {
518       lrows[count++] = values[j] - base;
519     }
520   }
521   ierr = PetscFree(rvalues);CHKERRQ(ierr);
522   ierr = PetscFree(lens);CHKERRQ(ierr);
523   ierr = PetscFree(owner);CHKERRQ(ierr);
524   ierr = PetscFree(nprocs);CHKERRQ(ierr);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PLogObjectParent(A,istmp);
529 
530   /*
531         Zero the required rows. If the "diagonal block" of the matrix
532      is square and the user wishes to set the diagonal we use seperate
533      code so that MatSetValues() is not called for each diagonal allocating
534      new memory, thus calling lots of mallocs and slowing things down.
535 
536        Contributed by: Mathew Knepley
537   */
538   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
539   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
540   if (diag && (l->A->M == l->A->N)) {
541     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
542   } else if (diag) {
543     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
544     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
545       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
547     }
548     for (i = 0; i < slen; i++) {
549       row  = lrows[i] + rstart;
550       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
551     }
552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554   } else {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556   }
557   ierr = ISDestroy(istmp);CHKERRQ(ierr);
558   ierr = PetscFree(lrows);CHKERRQ(ierr);
559 
560   /* wait on sends */
561   if (nsends) {
562     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
563     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
564     ierr        = PetscFree(send_status);CHKERRQ(ierr);
565   }
566   ierr = PetscFree(send_waits);CHKERRQ(ierr);
567   ierr = PetscFree(svalues);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNC__
573 #define __FUNC__ "MatMult_MPIAIJ"
574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
575 {
576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577   int        ierr,nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != a->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ "MatMultAdd_MPIAIJ"
593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNC__
607 #define __FUNC__ "MatMultTranspose_MPIAIJ"
608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   int        ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ "MatMultTransposeAdd_MPIAIJ"
629 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
630 {
631   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
632   int        ierr;
633 
634   PetscFunctionBegin;
635   /* do nondiagonal part */
636   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
637   /* send it on its way */
638   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639   /* do local part */
640   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
641   /* receive remote parts: note this assumes the values are not actually */
642   /* inserted in yy until the next line, which is true for my implementation*/
643   /* but is not perhaps always true. */
644   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 /*
649   This only works correctly for square matrices where the subblock A->A is the
650    diagonal block
651 */
652 #undef __FUNC__
653 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
654 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
655 {
656   int        ierr;
657   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
658 
659   PetscFunctionBegin;
660   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
661   if (a->rstart != a->cstart || a->rend != a->cend) {
662     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
663   }
664   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatScale_MPIAIJ"
670 int MatScale_MPIAIJ(Scalar *aa,Mat A)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
673   int        ierr;
674 
675   PetscFunctionBegin;
676   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
677   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNC__
682 #define __FUNC__ "MatDestroy_MPIAIJ"
683 int MatDestroy_MPIAIJ(Mat mat)
684 {
685   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689 
690   if (mat->mapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
692   }
693   if (mat->bmapping) {
694     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
695   }
696   if (mat->rmap) {
697     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
698   }
699   if (mat->cmap) {
700     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
701   }
702 #if defined(PETSC_USE_LOG)
703   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PLogObjectDestroy(mat);
720   PetscHeaderDestroy(mat);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNC__
725 #define __FUNC__ "MatView_MPIAIJ_Binary"
726 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
727 {
728   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
729   int         ierr;
730 
731   PetscFunctionBegin;
732   if (aij->size == 1) {
733     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
734   }
735   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNC__
740 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
741 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
742 {
743   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
745   int         ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size;
746   PetscTruth  isdraw,isascii,flg;
747 
748   PetscFunctionBegin;
749   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
751   if (isascii) {
752     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
753     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
754       MatInfo info;
755       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
757       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
758       if (flg) {
759         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
760 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
761       } else {
762         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
763 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
764       }
765       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
766       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
767       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
768       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
769       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
770       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
771       PetscFunctionReturn(0);
772     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
773       PetscFunctionReturn(0);
774     }
775   } else if (isdraw) {
776     Draw       draw;
777     PetscTruth isnull;
778     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
779     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
780   }
781 
782   if (size == 1) {
783     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
784   } else {
785     /* assemble the entire matrix onto first processor. */
786     Mat         A;
787     Mat_SeqAIJ *Aloc;
788     int         M = aij->M,N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
789     Scalar      *a;
790 
791     if (!rank) {
792       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
793     } else {
794       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
795     }
796     PLogObjectParent(mat,A);
797 
798     /* copy over the A part */
799     Aloc = (Mat_SeqAIJ*)aij->A->data;
800     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
801     row = aij->rstart;
802     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
803     for (i=0; i<m; i++) {
804       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
805       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
806     }
807     aj = Aloc->j;
808     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
809 
810     /* copy over the B part */
811     Aloc = (Mat_SeqAIJ*)aij->B->data;
812     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
813     row = aij->rstart;
814     ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols);
815     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
816     for (i=0; i<m; i++) {
817       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
818       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
819     }
820     ierr = PetscFree(ct);CHKERRQ(ierr);
821     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     /*
824        Everyone has to call to draw the matrix since the graphics waits are
825        synchronized across all processors that share the Draw object
826     */
827     if (!rank) {
828       Viewer sviewer;
829       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
830       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
831       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
832     }
833     ierr = MatDestroy(A);CHKERRQ(ierr);
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNC__
839 #define __FUNC__ "MatView_MPIAIJ"
840 int MatView_MPIAIJ(Mat mat,Viewer viewer)
841 {
842   int        ierr;
843   PetscTruth isascii,isdraw,issocket,isbinary;
844 
845   PetscFunctionBegin;
846   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
847   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
848   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
849   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
850   if (isascii || isdraw || isbinary || issocket) {
851     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
852   } else {
853     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 /*
859     This has to provide several versions.
860 
861      2) a) use only local smoothing updating outer values only once.
862         b) local smoothing updating outer values each inner iteration
863      3) color updating out values betwen colors.
864 */
865 #undef __FUNC__
866 #define __FUNC__ "MatRelax_MPIAIJ"
867 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,
868                            PetscReal fshift,int its,Vec xx)
869 {
870   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
871   Mat        AA = mat->A,BB = mat->B;
872   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
873   Scalar     *b,*x,*xs,*ls,d,*v,sum;
874   int        ierr,*idx,*diag;
875   int        n = mat->n,m = mat->m,i,shift = A->indexshift;
876 
877   PetscFunctionBegin;
878   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
879   diag = A->diag;
880   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
881     if (flag & SOR_ZERO_INITIAL_GUESS) {
882       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
883       PetscFunctionReturn(0);
884     }
885     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
886     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
887     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888     if (xx != bb) {
889       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
890     } else {
891       b = x;
892     }
893     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
894     xs = x + shift; /* shift by one for index start of 1 */
895     ls = ls + shift;
896     while (its--) {
897       /* go down through the rows */
898       for (i=0; i<m; i++) {
899         n    = A->i[i+1] - A->i[i];
900 	PLogFlops(4*n+3);
901         idx  = A->j + A->i[i] + shift;
902         v    = A->a + A->i[i] + shift;
903         sum  = b[i];
904         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905         d    = fshift + A->a[diag[i]+shift];
906         n    = B->i[i+1] - B->i[i];
907         idx  = B->j + B->i[i] + shift;
908         v    = B->a + B->i[i] + shift;
909         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
911       }
912       /* come up through the rows */
913       for (i=m-1; i>-1; i--) {
914         n    = A->i[i+1] - A->i[i];
915 	PLogFlops(4*n+3)
916         idx  = A->j + A->i[i] + shift;
917         v    = A->a + A->i[i] + shift;
918         sum  = b[i];
919         SPARSEDENSEMDOT(sum,xs,v,idx,n);
920         d    = fshift + A->a[diag[i]+shift];
921         n    = B->i[i+1] - B->i[i];
922         idx  = B->j + B->i[i] + shift;
923         v    = B->a + B->i[i] + shift;
924         SPARSEDENSEMDOT(sum,ls,v,idx,n);
925         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
926       }
927     }
928     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
930     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
931   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
932     if (flag & SOR_ZERO_INITIAL_GUESS) {
933       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
934       PetscFunctionReturn(0);
935     }
936     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
937     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
938     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
939     if (xx != bb) {
940       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
941     } else {
942       b = x;
943     }
944     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
945     xs = x + shift; /* shift by one for index start of 1 */
946     ls = ls + shift;
947     while (its--) {
948       for (i=0; i<m; i++) {
949         n    = A->i[i+1] - A->i[i];
950 	PLogFlops(4*n+3);
951         idx  = A->j + A->i[i] + shift;
952         v    = A->a + A->i[i] + shift;
953         sum  = b[i];
954         SPARSEDENSEMDOT(sum,xs,v,idx,n);
955         d    = fshift + A->a[diag[i]+shift];
956         n    = B->i[i+1] - B->i[i];
957         idx  = B->j + B->i[i] + shift;
958         v    = B->a + B->i[i] + shift;
959         SPARSEDENSEMDOT(sum,ls,v,idx,n);
960         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
961       }
962     }
963     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
965     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
966   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
967     if (flag & SOR_ZERO_INITIAL_GUESS) {
968       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     }
971     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
972     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
973     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
974     if (xx != bb) {
975       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
976     } else {
977       b = x;
978     }
979     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
980     xs = x + shift; /* shift by one for index start of 1 */
981     ls = ls + shift;
982     while (its--) {
983       for (i=m-1; i>-1; i--) {
984         n    = A->i[i+1] - A->i[i];
985 	PLogFlops(4*n+3);
986         idx  = A->j + A->i[i] + shift;
987         v    = A->a + A->i[i] + shift;
988         sum  = b[i];
989         SPARSEDENSEMDOT(sum,xs,v,idx,n);
990         d    = fshift + A->a[diag[i]+shift];
991         n    = B->i[i+1] - B->i[i];
992         idx  = B->j + B->i[i] + shift;
993         v    = B->a + B->i[i] + shift;
994         SPARSEDENSEMDOT(sum,ls,v,idx,n);
995         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
996       }
997     }
998     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
999     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
1000     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1001   } else {
1002     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNC__
1008 #define __FUNC__ "MatGetInfo_MPIAIJ"
1009 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1010 {
1011   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1012   Mat        A = mat->A,B = mat->B;
1013   int        ierr;
1014   PetscReal  isend[5],irecv[5];
1015 
1016   PetscFunctionBegin;
1017   info->block_size     = 1.0;
1018   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1019   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1020   isend[3] = info->memory;  isend[4] = info->mallocs;
1021   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1022   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1023   isend[3] += info->memory;  isend[4] += info->mallocs;
1024   if (flag == MAT_LOCAL) {
1025     info->nz_used      = isend[0];
1026     info->nz_allocated = isend[1];
1027     info->nz_unneeded  = isend[2];
1028     info->memory       = isend[3];
1029     info->mallocs      = isend[4];
1030   } else if (flag == MAT_GLOBAL_MAX) {
1031     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1032     info->nz_used      = irecv[0];
1033     info->nz_allocated = irecv[1];
1034     info->nz_unneeded  = irecv[2];
1035     info->memory       = irecv[3];
1036     info->mallocs      = irecv[4];
1037   } else if (flag == MAT_GLOBAL_SUM) {
1038     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1039     info->nz_used      = irecv[0];
1040     info->nz_allocated = irecv[1];
1041     info->nz_unneeded  = irecv[2];
1042     info->memory       = irecv[3];
1043     info->mallocs      = irecv[4];
1044   }
1045   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1046   info->fill_ratio_needed = 0;
1047   info->factor_mallocs    = 0;
1048   info->rows_global       = (double)mat->M;
1049   info->columns_global    = (double)mat->N;
1050   info->rows_local        = (double)mat->m;
1051   info->columns_local     = (double)mat->N;
1052 
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNC__
1057 #define __FUNC__ "MatSetOption_MPIAIJ"
1058 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1061   int        ierr;
1062 
1063   PetscFunctionBegin;
1064   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1065       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1066       op == MAT_COLUMNS_UNSORTED ||
1067       op == MAT_COLUMNS_SORTED ||
1068       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1069       op == MAT_KEEP_ZEROED_ROWS ||
1070       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1071       op == MAT_IGNORE_ZERO_ENTRIES) {
1072         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1073         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1074   } else if (op == MAT_ROW_ORIENTED) {
1075     a->roworiented = PETSC_TRUE;
1076     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1077     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1078   } else if (op == MAT_ROWS_SORTED ||
1079              op == MAT_ROWS_UNSORTED ||
1080              op == MAT_SYMMETRIC ||
1081              op == MAT_STRUCTURALLY_SYMMETRIC ||
1082              op == MAT_YES_NEW_DIAGONALS) {
1083     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1084   } else if (op == MAT_COLUMN_ORIENTED) {
1085     a->roworiented = PETSC_FALSE;
1086     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1087     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1088   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1089     a->donotstash = PETSC_TRUE;
1090   } else if (op == MAT_NO_NEW_DIAGONALS){
1091     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1092   } else {
1093     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNC__
1099 #define __FUNC__ "MatGetSize_MPIAIJ"
1100 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1101 {
1102   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1103 
1104   PetscFunctionBegin;
1105   if (m) *m = mat->M;
1106   if (n) *n = mat->N;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNC__
1111 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1112 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1113 {
1114   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1115 
1116   PetscFunctionBegin;
1117   if (m) *m = mat->m;
1118   if (n) *n = mat->n;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNC__
1123 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1124 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1125 {
1126   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1127 
1128   PetscFunctionBegin;
1129   *m = mat->rstart; *n = mat->rend;
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNC__
1134 #define __FUNC__ "MatGetRow_MPIAIJ"
1135 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1136 {
1137   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1138   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1139   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1140   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1141   int        *cmap,*idx_p;
1142 
1143   PetscFunctionBegin;
1144   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1145   mat->getrowactive = PETSC_TRUE;
1146 
1147   if (!mat->rowvalues && (idx || v)) {
1148     /*
1149         allocate enough space to hold information from the longest row.
1150     */
1151     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1152     int     max = 1,m = mat->m,tmp;
1153     for (i=0; i<m; i++) {
1154       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1155       if (max < tmp) { max = tmp; }
1156     }
1157     mat->rowvalues  = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1158     mat->rowindices = (int*)(mat->rowvalues + max);
1159   }
1160 
1161   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1162   lrow = row - rstart;
1163 
1164   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1165   if (!v)   {pvA = 0; pvB = 0;}
1166   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1167   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1168   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1169   nztot = nzA + nzB;
1170 
1171   cmap  = mat->garray;
1172   if (v  || idx) {
1173     if (nztot) {
1174       /* Sort by increasing column numbers, assuming A and B already sorted */
1175       int imark = -1;
1176       if (v) {
1177         *v = v_p = mat->rowvalues;
1178         for (i=0; i<nzB; i++) {
1179           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1180           else break;
1181         }
1182         imark = i;
1183         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1184         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1185       }
1186       if (idx) {
1187         *idx = idx_p = mat->rowindices;
1188         if (imark > -1) {
1189           for (i=0; i<imark; i++) {
1190             idx_p[i] = cmap[cworkB[i]];
1191           }
1192         } else {
1193           for (i=0; i<nzB; i++) {
1194             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1195             else break;
1196           }
1197           imark = i;
1198         }
1199         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1200         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1201       }
1202     } else {
1203       if (idx) *idx = 0;
1204       if (v)   *v   = 0;
1205     }
1206   }
1207   *nz = nztot;
1208   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNC__
1214 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1215 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1216 {
1217   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1218 
1219   PetscFunctionBegin;
1220   if (aij->getrowactive == PETSC_FALSE) {
1221     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1222   }
1223   aij->getrowactive = PETSC_FALSE;
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNC__
1228 #define __FUNC__ "MatNorm_MPIAIJ"
1229 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1230 {
1231   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1232   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1233   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1234   PetscReal  sum = 0.0;
1235   Scalar     *v;
1236 
1237   PetscFunctionBegin;
1238   if (aij->size == 1) {
1239     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1240   } else {
1241     if (type == NORM_FROBENIUS) {
1242       v = amat->a;
1243       for (i=0; i<amat->nz; i++) {
1244 #if defined(PETSC_USE_COMPLEX)
1245         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1246 #else
1247         sum += (*v)*(*v); v++;
1248 #endif
1249       }
1250       v = bmat->a;
1251       for (i=0; i<bmat->nz; i++) {
1252 #if defined(PETSC_USE_COMPLEX)
1253         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1254 #else
1255         sum += (*v)*(*v); v++;
1256 #endif
1257       }
1258       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1259       *norm = sqrt(*norm);
1260     } else if (type == NORM_1) { /* max column norm */
1261       PetscReal *tmp,*tmp2;
1262       int    *jj,*garray = aij->garray;
1263       tmp  = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp);
1264       tmp2 = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2);
1265       ierr = PetscMemzero(tmp,aij->N*sizeof(PetscReal));CHKERRQ(ierr);
1266       *norm = 0.0;
1267       v = amat->a; jj = amat->j;
1268       for (j=0; j<amat->nz; j++) {
1269         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1270       }
1271       v = bmat->a; jj = bmat->j;
1272       for (j=0; j<bmat->nz; j++) {
1273         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1274       }
1275       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1276       for (j=0; j<aij->N; j++) {
1277         if (tmp2[j] > *norm) *norm = tmp2[j];
1278       }
1279       ierr = PetscFree(tmp);CHKERRQ(ierr);
1280       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1281     } else if (type == NORM_INFINITY) { /* max row norm */
1282       PetscReal ntemp = 0.0;
1283       for (j=0; j<amat->m; j++) {
1284         v = amat->a + amat->i[j] + shift;
1285         sum = 0.0;
1286         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1287           sum += PetscAbsScalar(*v); v++;
1288         }
1289         v = bmat->a + bmat->i[j] + shift;
1290         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1291           sum += PetscAbsScalar(*v); v++;
1292         }
1293         if (sum > ntemp) ntemp = sum;
1294       }
1295       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1296     } else {
1297       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1298     }
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNC__
1304 #define __FUNC__ "MatTranspose_MPIAIJ"
1305 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1306 {
1307   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1308   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1309   int        ierr,shift = Aloc->indexshift;
1310   int        M = a->M,N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1311   Mat        B;
1312   Scalar     *array;
1313 
1314   PetscFunctionBegin;
1315   if (!matout && M != N) {
1316     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1317   }
1318 
1319   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1320 
1321   /* copy over the A part */
1322   Aloc = (Mat_SeqAIJ*)a->A->data;
1323   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1324   row = a->rstart;
1325   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1326   for (i=0; i<m; i++) {
1327     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1328     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1329   }
1330   aj = Aloc->j;
1331   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1332 
1333   /* copy over the B part */
1334   Aloc = (Mat_SeqAIJ*)a->B->data;
1335   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1336   row = a->rstart;
1337   ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols);
1338   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1339   for (i=0; i<m; i++) {
1340     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1341     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1342   }
1343   ierr = PetscFree(ct);CHKERRQ(ierr);
1344   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1345   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1346   if (matout) {
1347     *matout = B;
1348   } else {
1349     PetscOps       *Abops;
1350     struct _MatOps *Aops;
1351 
1352     /* This isn't really an in-place transpose .... but free data structures from a */
1353     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1354     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1355     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1356 #if defined (PETSC_USE_CTABLE)
1357     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1358 #else
1359     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1360 #endif
1361     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1362     if (a->lvec)   {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);}
1363     if (a->Mvctx)  {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);}
1364     ierr = PetscFree(a);CHKERRQ(ierr);
1365 
1366     /*
1367        This is horrible, horrible code. We need to keep the
1368       A pointers for the bops and ops but copy everything
1369       else from C.
1370     */
1371     Abops   = A->bops;
1372     Aops    = A->ops;
1373     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1374     A->bops = Abops;
1375     A->ops  = Aops;
1376     PetscHeaderDestroy(B);
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1383 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1384 {
1385   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1386   Mat        a = aij->A,b = aij->B;
1387   int        ierr,s1,s2,s3;
1388 
1389   PetscFunctionBegin;
1390   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1391   if (rr) {
1392     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1393     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1394     /* Overlap communication with computation. */
1395     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1396   }
1397   if (ll) {
1398     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1399     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1400     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1401   }
1402   /* scale  the diagonal block */
1403   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1404 
1405   if (rr) {
1406     /* Do a scatter end and then right scale the off-diagonal block */
1407     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1408     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1409   }
1410 
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 
1415 #undef __FUNC__
1416 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1417 int MatPrintHelp_MPIAIJ(Mat A)
1418 {
1419   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1420   int        ierr;
1421 
1422   PetscFunctionBegin;
1423   if (!a->rank) {
1424     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNC__
1430 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1431 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1432 {
1433   PetscFunctionBegin;
1434   *bs = 1;
1435   PetscFunctionReturn(0);
1436 }
1437 #undef __FUNC__
1438 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1439 int MatSetUnfactored_MPIAIJ(Mat A)
1440 {
1441   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1442   int        ierr;
1443 
1444   PetscFunctionBegin;
1445   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNC__
1450 #define __FUNC__ "MatEqual_MPIAIJ"
1451 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1452 {
1453   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1454   Mat        a,b,c,d;
1455   PetscTruth flg;
1456   int        ierr;
1457 
1458   PetscFunctionBegin;
1459   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1460   a = matA->A; b = matA->B;
1461   c = matB->A; d = matB->B;
1462 
1463   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1464   if (flg == PETSC_TRUE) {
1465     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1466   }
1467   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNC__
1472 #define __FUNC__ "MatCopy_MPIAIJ"
1473 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1474 {
1475   int        ierr;
1476   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1477   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1478 
1479   PetscFunctionBegin;
1480   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1481     /* because of the column compression in the off-processor part of the matrix a->B,
1482        the number of columns in a->B and b->B may be different, hence we cannot call
1483        the MatCopy() directly on the two parts. If need be, we can provide a more
1484        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1485        then copying the submatrices */
1486     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1487   } else {
1488     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1489     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1495 extern int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1496 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1497 extern int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1498 extern int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1499 
1500 /* -------------------------------------------------------------------*/
1501 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1502        MatGetRow_MPIAIJ,
1503        MatRestoreRow_MPIAIJ,
1504        MatMult_MPIAIJ,
1505        MatMultAdd_MPIAIJ,
1506        MatMultTranspose_MPIAIJ,
1507        MatMultTransposeAdd_MPIAIJ,
1508        0,
1509        0,
1510        0,
1511        0,
1512        0,
1513        0,
1514        MatRelax_MPIAIJ,
1515        MatTranspose_MPIAIJ,
1516        MatGetInfo_MPIAIJ,
1517        MatEqual_MPIAIJ,
1518        MatGetDiagonal_MPIAIJ,
1519        MatDiagonalScale_MPIAIJ,
1520        MatNorm_MPIAIJ,
1521        MatAssemblyBegin_MPIAIJ,
1522        MatAssemblyEnd_MPIAIJ,
1523        0,
1524        MatSetOption_MPIAIJ,
1525        MatZeroEntries_MPIAIJ,
1526        MatZeroRows_MPIAIJ,
1527        0,
1528        0,
1529        0,
1530        0,
1531        MatGetSize_MPIAIJ,
1532        MatGetLocalSize_MPIAIJ,
1533        MatGetOwnershipRange_MPIAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538        MatDuplicate_MPIAIJ,
1539        0,
1540        0,
1541        0,
1542        0,
1543        0,
1544        MatGetSubMatrices_MPIAIJ,
1545        MatIncreaseOverlap_MPIAIJ,
1546        MatGetValues_MPIAIJ,
1547        MatCopy_MPIAIJ,
1548        MatPrintHelp_MPIAIJ,
1549        MatScale_MPIAIJ,
1550        0,
1551        0,
1552        0,
1553        MatGetBlockSize_MPIAIJ,
1554        0,
1555        0,
1556        0,
1557        0,
1558        MatFDColoringCreate_MPIAIJ,
1559        0,
1560        MatSetUnfactored_MPIAIJ,
1561        0,
1562        0,
1563        MatGetSubMatrix_MPIAIJ,
1564        0,
1565        0,
1566        MatGetMaps_Petsc};
1567 
1568 /* ----------------------------------------------------------------------------------------*/
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNC__
1572 #define __FUNC__ "MatStoreValues_MPIAIJ"
1573 int MatStoreValues_MPIAIJ(Mat mat)
1574 {
1575   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1576   int        ierr;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1580   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 EXTERN_C_END
1584 
1585 EXTERN_C_BEGIN
1586 #undef __FUNC__
1587 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1588 int MatRetrieveValues_MPIAIJ(Mat mat)
1589 {
1590   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1591   int        ierr;
1592 
1593   PetscFunctionBegin;
1594   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1595   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 EXTERN_C_END
1599 
1600 #include "pc.h"
1601 EXTERN_C_BEGIN
1602 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1603 EXTERN_C_END
1604 
1605 #undef __FUNC__
1606 #define __FUNC__ "MatCreateMPIAIJ"
1607 /*@C
1608    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1609    (the default parallel PETSc format).  For good matrix assembly performance
1610    the user should preallocate the matrix storage by setting the parameters
1611    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1612    performance can be increased by more than a factor of 50.
1613 
1614    Collective on MPI_Comm
1615 
1616    Input Parameters:
1617 +  comm - MPI communicator
1618 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1619            This value should be the same as the local size used in creating the
1620            y vector for the matrix-vector product y = Ax.
1621 .  n - This value should be the same as the local size used in creating the
1622        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1623        calculated if N is given) For square matrices n is almost always m.
1624 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1625 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1626 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1627            (same value is used for all local rows)
1628 .  d_nnz - array containing the number of nonzeros in the various rows of the
1629            DIAGONAL portion of the local submatrix (possibly different for each row)
1630            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1631            The size of this array is equal to the number of local rows, i.e 'm'.
1632            You must leave room for the diagonal entry even if it is zero.
1633 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1634            submatrix (same value is used for all local rows).
1635 -  o_nnz - array containing the number of nonzeros in the various rows of the
1636            OFF-DIAGONAL portion of the local submatrix (possibly different for
1637            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1638            structure. The size of this array is equal to the number
1639            of local rows, i.e 'm'.
1640 
1641    Output Parameter:
1642 .  A - the matrix
1643 
1644    Notes:
1645    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1646    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1647    storage requirements for this matrix.
1648 
1649    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1650    processor than it must be used on all processors that share the object for
1651    that argument.
1652 
1653    The AIJ format (also called the Yale sparse matrix format or
1654    compressed row storage), is fully compatible with standard Fortran 77
1655    storage.  That is, the stored row and column indices can begin at
1656    either one (as in Fortran) or zero.  See the users manual for details.
1657 
1658    The user MUST specify either the local or global matrix dimensions
1659    (possibly both).
1660 
1661    The parallel matrix is partitioned such that the first m0 rows belong to
1662    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1663    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1664 
1665    The DIAGONAL portion of the local submatrix of a processor can be defined
1666    as the submatrix which is obtained by extraction the part corresponding
1667    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1668    first row that belongs to the processor, and r2 is the last row belonging
1669    to the this processor. This is a square mxm matrix. The remaining portion
1670    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1671 
1672    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1673 
1674    By default, this format uses inodes (identical nodes) when possible.
1675    We search for consecutive rows with the same nonzero structure, thereby
1676    reusing matrix information to achieve increased efficiency.
1677 
1678    Options Database Keys:
1679 +  -mat_aij_no_inode  - Do not use inodes
1680 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1681 -  -mat_aij_oneindex - Internally use indexing starting at 1
1682         rather than 0.  Note that when calling MatSetValues(),
1683         the user still MUST index entries starting at 0!
1684 .   -mat_mpi - use the parallel matrix data structures even on one processor
1685                (defaults to using SeqBAIJ format on one processor)
1686 .   -mat_mpi - use the parallel matrix data structures even on one processor
1687                (defaults to using SeqAIJ format on one processor)
1688 
1689 
1690    Example usage:
1691 
1692    Consider the following 8x8 matrix with 34 non-zero values, that is
1693    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1694    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1695    as follows:
1696 
1697 .vb
1698             1  2  0  |  0  3  0  |  0  4
1699     Proc0   0  5  6  |  7  0  0  |  8  0
1700             9  0 10  | 11  0  0  | 12  0
1701     -------------------------------------
1702            13  0 14  | 15 16 17  |  0  0
1703     Proc1   0 18  0  | 19 20 21  |  0  0
1704             0  0  0  | 22 23  0  | 24  0
1705     -------------------------------------
1706     Proc2  25 26 27  |  0  0 28  | 29  0
1707            30  0  0  | 31 32 33  |  0 34
1708 .ve
1709 
1710    This can be represented as a collection of submatrices as:
1711 
1712 .vb
1713       A B C
1714       D E F
1715       G H I
1716 .ve
1717 
1718    Where the submatrices A,B,C are owned by proc0, D,E,F are
1719    owned by proc1, G,H,I are owned by proc2.
1720 
1721    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1722    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1723    The 'M','N' parameters are 8,8, and have the same values on all procs.
1724 
1725    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1726    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1727    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1728    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1729    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1730    matrix, ans [DF] as another SeqAIJ matrix.
1731 
1732    When d_nz, o_nz parameters are specified, d_nz storage elements are
1733    allocated for every row of the local diagonal submatrix, and o_nz
1734    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1735    One way to choose d_nz and o_nz is to use the max nonzerors per local
1736    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1737    In this case, the values of d_nz,o_nz are:
1738 .vb
1739      proc0 : dnz = 2, o_nz = 2
1740      proc1 : dnz = 3, o_nz = 2
1741      proc2 : dnz = 1, o_nz = 4
1742 .ve
1743    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1744    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1745    for proc3. i.e we are using 12+15+10=37 storage locations to store
1746    34 values.
1747 
1748    When d_nnz, o_nnz parameters are specified, the storage is specified
1749    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1750    In the above case the values for d_nnz,o_nnz are:
1751 .vb
1752      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1753      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1754      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1755 .ve
1756    Here the space allocated is sum of all the above values i.e 34, and
1757    hence pre-allocation is perfect.
1758 
1759    Level: intermediate
1760 
1761 .keywords: matrix, aij, compressed row, sparse, parallel
1762 
1763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1764 @*/
1765 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1766 {
1767   Mat          B;
1768   Mat_MPIAIJ   *b;
1769   int          ierr,i,size;
1770   PetscTruth   flag1,flag2;
1771 
1772   PetscFunctionBegin;
1773 
1774   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1775   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1776   if (d_nnz) {
1777     for (i=0; i<m; i++) {
1778       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1779     }
1780   }
1781   if (o_nnz) {
1782     for (i=0; i<m; i++) {
1783       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1784     }
1785   }
1786 
1787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1788   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1789   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1790   if (!flag1 && !flag2 && size == 1) {
1791     if (M == PETSC_DECIDE) M = m;
1792     if (N == PETSC_DECIDE) N = n;
1793     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1794     PetscFunctionReturn(0);
1795   }
1796 
1797   *A = 0;
1798   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1799   PLogObjectCreate(B);
1800   B->data         = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1801   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1802   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1803   B->ops->destroy = MatDestroy_MPIAIJ;
1804   B->ops->view    = MatView_MPIAIJ;
1805   B->factor       = 0;
1806   B->assembled    = PETSC_FALSE;
1807   B->mapping      = 0;
1808 
1809   B->insertmode      = NOT_SET_VALUES;
1810   b->size            = size;
1811   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1812 
1813   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
1814     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1815   }
1816 
1817   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1818   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1819   b->m = m; B->m = m;
1820   b->n = n; B->n = n;
1821   b->N = N; B->N = N;
1822   b->M = M; B->M = M;
1823 
1824   /* the information in the maps duplicates the information computed below, eventually
1825      we should remove the duplicate information that is not contained in the maps */
1826   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1827   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1828 
1829   /* build local table of row and column ownerships */
1830   b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1831   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1832   b->cowners = b->rowners + b->size + 2;
1833   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1834   b->rowners[0] = 0;
1835   for (i=2; i<=b->size; i++) {
1836     b->rowners[i] += b->rowners[i-1];
1837   }
1838   b->rstart = b->rowners[b->rank];
1839   b->rend   = b->rowners[b->rank+1];
1840   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1841   b->cowners[0] = 0;
1842   for (i=2; i<=b->size; i++) {
1843     b->cowners[i] += b->cowners[i-1];
1844   }
1845   b->cstart = b->cowners[b->rank];
1846   b->cend   = b->cowners[b->rank+1];
1847 
1848   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1849   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1850   PLogObjectParent(B,b->A);
1851   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1852   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1853   PLogObjectParent(B,b->B);
1854 
1855   /* build cache for off array entries formed */
1856   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1857   b->donotstash  = PETSC_FALSE;
1858   b->colmap      = 0;
1859   b->garray      = 0;
1860   b->roworiented = PETSC_TRUE;
1861 
1862   /* stuff used for matrix vector multiply */
1863   b->lvec      = PETSC_NULL;
1864   b->Mvctx     = PETSC_NULL;
1865 
1866   /* stuff for MatGetRow() */
1867   b->rowindices   = 0;
1868   b->rowvalues    = 0;
1869   b->getrowactive = PETSC_FALSE;
1870 
1871   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1872                                      "MatStoreValues_MPIAIJ",
1873                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1874   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1875                                      "MatRetrieveValues_MPIAIJ",
1876                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1877   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1878                                      "MatGetDiagonalBlock_MPIAIJ",
1879                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1880   *A = B;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNC__
1885 #define __FUNC__ "MatDuplicate_MPIAIJ"
1886 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1887 {
1888   Mat        mat;
1889   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1890   int        ierr,len = 0;
1891   PetscTruth flg;
1892 
1893   PetscFunctionBegin;
1894   *newmat       = 0;
1895   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1896   PLogObjectCreate(mat);
1897   mat->data         = (void*)(a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1898   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1899   mat->ops->destroy = MatDestroy_MPIAIJ;
1900   mat->ops->view    = MatView_MPIAIJ;
1901   mat->factor       = matin->factor;
1902   mat->assembled    = PETSC_TRUE;
1903   mat->insertmode   = NOT_SET_VALUES;
1904 
1905   a->m = mat->m   = oldmat->m;
1906   a->n = mat->n   = oldmat->n;
1907   a->M = mat->M   = oldmat->M;
1908   a->N = mat->N   = oldmat->N;
1909 
1910   a->rstart       = oldmat->rstart;
1911   a->rend         = oldmat->rend;
1912   a->cstart       = oldmat->cstart;
1913   a->cend         = oldmat->cend;
1914   a->size         = oldmat->size;
1915   a->rank         = oldmat->rank;
1916   a->donotstash   = oldmat->donotstash;
1917   a->roworiented  = oldmat->roworiented;
1918   a->rowindices   = 0;
1919   a->rowvalues    = 0;
1920   a->getrowactive = PETSC_FALSE;
1921 
1922   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1923   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1924   a->cowners = a->rowners + a->size + 2;
1925   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1926   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1927   if (oldmat->colmap) {
1928 #if defined (PETSC_USE_CTABLE)
1929     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1930 #else
1931     a->colmap = (int*)PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1932     PLogObjectMemory(mat,(a->N)*sizeof(int));
1933     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1934 #endif
1935   } else a->colmap = 0;
1936   if (oldmat->garray) {
1937     len = ((Mat_SeqAIJ*)(oldmat->B->data))->n;
1938     a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1939     PLogObjectMemory(mat,len*sizeof(int));
1940     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1941   } else a->garray = 0;
1942 
1943   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1944   PLogObjectParent(mat,a->lvec);
1945   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1946   PLogObjectParent(mat,a->Mvctx);
1947   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1948   PLogObjectParent(mat,a->A);
1949   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1950   PLogObjectParent(mat,a->B);
1951   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1952   if (flg) {
1953     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1954   }
1955   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1956   *newmat = mat;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "sys.h"
1961 
1962 #undef __FUNC__
1963 #define __FUNC__ "MatLoad_MPIAIJ"
1964 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1965 {
1966   Mat          A;
1967   Scalar       *vals,*svals;
1968   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1969   MPI_Status   status;
1970   int          i,nz,ierr,j,rstart,rend,fd;
1971   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1972   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1973   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1974 
1975   PetscFunctionBegin;
1976   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1977   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1978   if (!rank) {
1979     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1980     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1981     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1982     if (header[3] < 0) {
1983       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1984     }
1985   }
1986 
1987   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1988   M = header[1]; N = header[2];
1989   /* determine ownership of all rows */
1990   m = M/size + ((M % size) > rank);
1991   rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1992   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1993   rowners[0] = 0;
1994   for (i=2; i<=size; i++) {
1995     rowners[i] += rowners[i-1];
1996   }
1997   rstart = rowners[rank];
1998   rend   = rowners[rank+1];
1999 
2000   /* distribute row lengths to all processors */
2001   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
2002   offlens = ourlens + (rend-rstart);
2003   if (!rank) {
2004     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
2005     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2006     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2007     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2008     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2009     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2010   } else {
2011     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2012   }
2013 
2014   if (!rank) {
2015     /* calculate the number of nonzeros on each processor */
2016     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2017     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2018     for (i=0; i<size; i++) {
2019       for (j=rowners[i]; j< rowners[i+1]; j++) {
2020         procsnz[i] += rowlengths[j];
2021       }
2022     }
2023     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2024 
2025     /* determine max buffer needed and allocate it */
2026     maxnz = 0;
2027     for (i=0; i<size; i++) {
2028       maxnz = PetscMax(maxnz,procsnz[i]);
2029     }
2030     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2031 
2032     /* read in my part of the matrix column indices  */
2033     nz     = procsnz[0];
2034     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
2035     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2036 
2037     /* read in every one elses and ship off */
2038     for (i=1; i<size; i++) {
2039       nz   = procsnz[i];
2040       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2041       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2042     }
2043     ierr = PetscFree(cols);CHKERRQ(ierr);
2044   } else {
2045     /* determine buffer space needed for message */
2046     nz = 0;
2047     for (i=0; i<m; i++) {
2048       nz += ourlens[i];
2049     }
2050     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
2051 
2052     /* receive message of column indices*/
2053     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2054     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2055     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2056   }
2057 
2058   /* determine column ownership if matrix is not square */
2059   if (N != M) {
2060     n      = N/size + ((N % size) > rank);
2061     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2062     cstart = cend - n;
2063   } else {
2064     cstart = rstart;
2065     cend   = rend;
2066     n      = cend - cstart;
2067   }
2068 
2069   /* loop over local rows, determining number of off diagonal entries */
2070   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2071   jj = 0;
2072   for (i=0; i<m; i++) {
2073     for (j=0; j<ourlens[i]; j++) {
2074       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2075       jj++;
2076     }
2077   }
2078 
2079   /* create our matrix */
2080   for (i=0; i<m; i++) {
2081     ourlens[i] -= offlens[i];
2082   }
2083   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2084   A = *newmat;
2085   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2086   for (i=0; i<m; i++) {
2087     ourlens[i] += offlens[i];
2088   }
2089 
2090   if (!rank) {
2091     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
2092 
2093     /* read in my part of the matrix numerical values  */
2094     nz   = procsnz[0];
2095     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2096 
2097     /* insert into matrix */
2098     jj      = rstart;
2099     smycols = mycols;
2100     svals   = vals;
2101     for (i=0; i<m; i++) {
2102       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2103       smycols += ourlens[i];
2104       svals   += ourlens[i];
2105       jj++;
2106     }
2107 
2108     /* read in other processors and ship out */
2109     for (i=1; i<size; i++) {
2110       nz   = procsnz[i];
2111       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2112       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2113     }
2114     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2115   } else {
2116     /* receive numeric values */
2117     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
2118 
2119     /* receive message of values*/
2120     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2121     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2122     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2123 
2124     /* insert into matrix */
2125     jj      = rstart;
2126     smycols = mycols;
2127     svals   = vals;
2128     for (i=0; i<m; i++) {
2129       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2130       smycols += ourlens[i];
2131       svals   += ourlens[i];
2132       jj++;
2133     }
2134   }
2135   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2136   ierr = PetscFree(vals);CHKERRQ(ierr);
2137   ierr = PetscFree(mycols);CHKERRQ(ierr);
2138   ierr = PetscFree(rowners);CHKERRQ(ierr);
2139 
2140   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2141   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNC__
2146 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2147 /*
2148     Not great since it makes two copies of the submatrix, first an SeqAIJ
2149   in local and then by concatenating the local matrices the end result.
2150   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2151 */
2152 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2153 {
2154   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2155   Mat        *local,M,Mreuse;
2156   Scalar     *vwork,*aa;
2157   MPI_Comm   comm = mat->comm;
2158   Mat_SeqAIJ *aij;
2159   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
2160 
2161   PetscFunctionBegin;
2162   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2163   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2164 
2165   if (call ==  MAT_REUSE_MATRIX) {
2166     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2167     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before,cannot reuse");
2168     local = &Mreuse;
2169     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2170   } else {
2171     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2172     Mreuse = *local;
2173     ierr = PetscFree(local);CHKERRQ(ierr);
2174   }
2175 
2176   /*
2177       m - number of local rows
2178       n - number of columns (same on all processors)
2179       rstart - first row in new global matrix generated
2180   */
2181   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2182   if (call == MAT_INITIAL_MATRIX) {
2183     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2184     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2185     ii  = aij->i;
2186     jj  = aij->j;
2187 
2188     /*
2189         Determine the number of non-zeros in the diagonal and off-diagonal
2190         portions of the matrix in order to do correct preallocation
2191     */
2192 
2193     /* first get start and end of "diagonal" columns */
2194     if (csize == PETSC_DECIDE) {
2195       nlocal = n/size + ((n % size) > rank);
2196     } else {
2197       nlocal = csize;
2198     }
2199     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2200     rstart = rend - nlocal;
2201     if (rank == size - 1 && rend != n) {
2202       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2203     }
2204 
2205     /* next, compute all the lengths */
2206     dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens);
2207     olens = dlens + m;
2208     for (i=0; i<m; i++) {
2209       jend = ii[i+1] - ii[i];
2210       olen = 0;
2211       dlen = 0;
2212       for (j=0; j<jend; j++) {
2213         if (*jj < rstart || *jj >= rend) olen++;
2214         else dlen++;
2215         jj++;
2216       }
2217       olens[i] = olen;
2218       dlens[i] = dlen;
2219     }
2220     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2221     ierr = PetscFree(dlens);CHKERRQ(ierr);
2222   } else {
2223     int ml,nl;
2224 
2225     M = *newmat;
2226     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2227     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2228     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2229     /*
2230          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2231        rather than the slower MatSetValues().
2232     */
2233     M->was_assembled = PETSC_TRUE;
2234     M->assembled     = PETSC_FALSE;
2235   }
2236   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2237   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2238   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2239   ii  = aij->i;
2240   jj  = aij->j;
2241   aa  = aij->a;
2242   for (i=0; i<m; i++) {
2243     row   = rstart + i;
2244     nz    = ii[i+1] - ii[i];
2245     cwork = jj;     jj += nz;
2246     vwork = aa;     aa += nz;
2247     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2248   }
2249 
2250   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252   *newmat = M;
2253 
2254   /* save submatrix used in processor for next request */
2255   if (call ==  MAT_INITIAL_MATRIX) {
2256     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2257     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2258   }
2259 
2260   PetscFunctionReturn(0);
2261 }
2262