xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6831982abb6453c2f3e25efecb5d0951d333371e)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.304 1999/10/13 20:37:20 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /*
17   Local utility routine that creates a mapping from the global column
18 number to the local number in the off-diagonal part of the local
19 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
20 a slightly higher hash table cost; without it it is not scalable (each processor
21 has an order N integer array but is fast to acess.
22 */
23 #undef __FUNC__
24 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
25 int CreateColmap_MPIAIJ_Private(Mat mat)
26 {
27   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
28   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
29   int        n = B->n,i,ierr;
30 
31   PetscFunctionBegin;
32 #if defined (PETSC_USE_CTABLE)
33   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
34   for ( i=0; i<n; i++ ){
35     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
36   }
37 #else
38   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
39   PLogObjectMemory(mat,aij->N*sizeof(int));
40   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
41   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
42 #endif
43   PetscFunctionReturn(0);
44 }
45 
46 #define CHUNKSIZE   15
47 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
48 { \
49  \
50     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
51     rmax = aimax[row]; nrow = ailen[row];  \
52     col1 = col - shift; \
53      \
54     low = 0; high = nrow; \
55     while (high-low > 5) { \
56       t = (low+high)/2; \
57       if (rp[t] > col) high = t; \
58       else             low  = t; \
59     } \
60       for ( _i=0; _i<nrow; _i++ ) { \
61         if (rp[_i] > col1) break; \
62         if (rp[_i] == col1) { \
63           if (addv == ADD_VALUES) ap[_i] += value;   \
64           else                  ap[_i] = value; \
65           goto a_noinsert; \
66         } \
67       }  \
68       if (nonew == 1) goto a_noinsert; \
69       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
70       if (nrow >= rmax) { \
71         /* there is no extra room in row, therefore enlarge */ \
72         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
73         Scalar *new_a; \
74  \
75         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
76  \
77         /* malloc new storage space */ \
78         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
79         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
80         new_j   = (int *) (new_a + new_nz); \
81         new_i   = new_j + new_nz; \
82  \
83         /* copy over old data into new slots */ \
84         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
85         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
86         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
87         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
88         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
89                                                            len*sizeof(int));CHKERRQ(ierr); \
90         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
91         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
92                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
93         /* free up old matrix storage */ \
94  \
95         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
96         if (!a->singlemalloc) { \
97            ierr = PetscFree(a->i);CHKERRQ(ierr); \
98            ierr = PetscFree(a->j);CHKERRQ(ierr); \
99         } \
100         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
101         a->singlemalloc = 1; \
102  \
103         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
104         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
105         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
106         a->maxnz += CHUNKSIZE; \
107         a->reallocs++; \
108       } \
109       N = nrow++ - 1; a->nz++; \
110       /* shift up all the later entries in this row */ \
111       for ( ii=N; ii>=_i; ii-- ) { \
112         rp[ii+1] = rp[ii]; \
113         ap[ii+1] = ap[ii]; \
114       } \
115       rp[_i] = col1;  \
116       ap[_i] = value;  \
117       a_noinsert: ; \
118       ailen[row] = nrow; \
119 }
120 
121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
122 { \
123  \
124     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
125     rmax = bimax[row]; nrow = bilen[row];  \
126     col1 = col - shift; \
127      \
128     low = 0; high = nrow; \
129     while (high-low > 5) { \
130       t = (low+high)/2; \
131       if (rp[t] > col) high = t; \
132       else             low  = t; \
133     } \
134        for ( _i=0; _i<nrow; _i++ ) { \
135         if (rp[_i] > col1) break; \
136         if (rp[_i] == col1) { \
137           if (addv == ADD_VALUES) ap[_i] += value;   \
138           else                  ap[_i] = value; \
139           goto b_noinsert; \
140         } \
141       }  \
142       if (nonew == 1) goto b_noinsert; \
143       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
144       if (nrow >= rmax) { \
145         /* there is no extra room in row, therefore enlarge */ \
146         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
147         Scalar *new_a; \
148  \
149         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
150  \
151         /* malloc new storage space */ \
152         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
153         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
154         new_j   = (int *) (new_a + new_nz); \
155         new_i   = new_j + new_nz; \
156  \
157         /* copy over old data into new slots */ \
158         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
159         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
160         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
161         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
162         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
163                                                            len*sizeof(int));CHKERRQ(ierr); \
164         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
165         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
166                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
167         /* free up old matrix storage */ \
168  \
169         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
170         if (!b->singlemalloc) { \
171           ierr = PetscFree(b->i);CHKERRQ(ierr); \
172           ierr = PetscFree(b->j);CHKERRQ(ierr); \
173         } \
174         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
175         b->singlemalloc = 1; \
176  \
177         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
178         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
179         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
180         b->maxnz += CHUNKSIZE; \
181         b->reallocs++; \
182       } \
183       N = nrow++ - 1; b->nz++; \
184       /* shift up all the later entries in this row */ \
185       for ( ii=N; ii>=_i; ii-- ) { \
186         rp[ii+1] = rp[ii]; \
187         ap[ii+1] = ap[ii]; \
188       } \
189       rp[_i] = col1;  \
190       ap[_i] = value;  \
191       b_noinsert: ; \
192       bilen[row] = nrow; \
193 }
194 
195 #undef __FUNC__
196 #define __FUNC__ "MatSetValues_MPIAIJ"
197 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
198 {
199   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
200   Scalar     value;
201   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
202   int        cstart = aij->cstart, cend = aij->cend,row,col;
203   int        roworiented = aij->roworiented;
204 
205   /* Some Variables required in the macro */
206   Mat        A = aij->A;
207   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
208   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
209   Scalar     *aa = a->a;
210 
211   Mat        B = aij->B;
212   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
213   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
214   Scalar     *ba = b->a;
215 
216   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
217   int        nonew = a->nonew,shift = a->indexshift;
218   Scalar     *ap;
219 
220   PetscFunctionBegin;
221   for ( i=0; i<m; i++ ) {
222     if (im[i] < 0) continue;
223 #if defined(PETSC_USE_BOPT_g)
224     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
225 #endif
226     if (im[i] >= rstart && im[i] < rend) {
227       row = im[i] - rstart;
228       for ( j=0; j<n; j++ ) {
229         if (in[j] >= cstart && in[j] < cend){
230           col = in[j] - cstart;
231           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
232           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         }
235         else if (in[j] < 0) continue;
236 #if defined(PETSC_USE_BOPT_g)
237         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
238 #endif
239         else {
240           if (mat->was_assembled) {
241             if (!aij->colmap) {
242               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
243             }
244 #if defined (PETSC_USE_CTABLE)
245             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
246 	    col--;
247 #else
248             col = aij->colmap[in[j]] - 1;
249 #endif
250             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
251               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
252               col =  in[j];
253               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
254               B = aij->B;
255               b = (Mat_SeqAIJ *) B->data;
256               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
257               ba = b->a;
258             }
259           } else col = in[j];
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNC__
279 #define __FUNC__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
283   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
284   int        cstart = aij->cstart, cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for ( i=0; i<m; i++ ) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
289     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for ( j=0; j<n; j++ ) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
294         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNC__
322 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNC__
349 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for ( i=0; i<n; ) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNC__
407 #define __FUNC__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
424   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433 
434   PetscFunctionBegin;
435   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   procs  = nprocs + size;
442   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
443   for ( i=0; i<N; i++ ) {
444     idx = rows[i];
445     found = 0;
446     for ( j=0; j<size; j++ ) {
447       if (idx >= owners[j] && idx < owners[j+1]) {
448         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
449       }
450     }
451     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
452   }
453   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
454 
455   /* inform other processors of number of messages and max length*/
456   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
457   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
458   nrecvs = work[size+rank];
459   nmax   = work[rank];
460   ierr   = PetscFree(work);CHKERRQ(ierr);
461 
462   /* post receives:   */
463   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
464   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
465   for ( i=0; i<nrecvs; i++ ) {
466     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
467   }
468 
469   /* do sends:
470       1) starts[i] gives the starting index in svalues for stuff going to
471          the ith processor
472   */
473   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
474   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
475   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
476   starts[0] = 0;
477   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
478   for ( i=0; i<N; i++ ) {
479     svalues[starts[owner[i]]++] = rows[i];
480   }
481   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
482 
483   starts[0] = 0;
484   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
485   count = 0;
486   for ( i=0; i<size; i++ ) {
487     if (procs[i]) {
488       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
489     }
490   }
491   ierr = PetscFree(starts);CHKERRQ(ierr);
492 
493   base = owners[rank];
494 
495   /*  wait on receives */
496   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
497   source = lens + nrecvs;
498   count  = nrecvs; slen = 0;
499   while (count) {
500     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
501     /* unpack receives into our local space */
502     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
503     source[imdex]  = recv_status.MPI_SOURCE;
504     lens[imdex]    = n;
505     slen          += n;
506     count--;
507   }
508   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
509 
510   /* move the data into the send scatter */
511   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
512   count = 0;
513   for ( i=0; i<nrecvs; i++ ) {
514     values = rvalues + i*nmax;
515     for ( j=0; j<lens[i]; j++ ) {
516       lrows[count++] = values[j] - base;
517     }
518   }
519   ierr = PetscFree(rvalues);CHKERRQ(ierr);
520   ierr = PetscFree(lens);CHKERRQ(ierr);
521   ierr = PetscFree(owner);CHKERRQ(ierr);
522   ierr = PetscFree(nprocs);CHKERRQ(ierr);
523 
524   /* actually zap the local rows */
525   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
526   PLogObjectParent(A,istmp);
527 
528   /*
529         Zero the required rows. If the "diagonal block" of the matrix
530      is square and the user wishes to set the diagonal we use seperate
531      code so that MatSetValues() is not called for each diagonal allocating
532      new memory, thus calling lots of mallocs and slowing things down.
533 
534        Contributed by: Mathew Knepley
535   */
536   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
537   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
538   if (diag && (l->A->M == l->A->N)) {
539     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
540   } else if (diag) {
541     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
542     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
543       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
544 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
545     }
546     for ( i = 0; i < slen; i++ ) {
547       row  = lrows[i] + rstart;
548       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
549     }
550     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552   } else {
553     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
554   }
555   ierr = ISDestroy(istmp);CHKERRQ(ierr);
556   ierr = PetscFree(lrows);CHKERRQ(ierr);
557 
558   /* wait on sends */
559   if (nsends) {
560     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
561     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
562     ierr        = PetscFree(send_status);CHKERRQ(ierr);
563   }
564   ierr = PetscFree(send_waits);CHKERRQ(ierr);
565   ierr = PetscFree(svalues);CHKERRQ(ierr);
566 
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNC__
571 #define __FUNC__ "MatMult_MPIAIJ"
572 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
573 {
574   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
575   int        ierr,nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != a->n) {
580     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatMultAdd_MPIAIJ"
591 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
594   int        ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNC__
605 #define __FUNC__ "MatMultTrans_MPIAIJ"
606 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNC__
626 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
627 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
628 {
629   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
630   int        ierr;
631 
632   PetscFunctionBegin;
633   /* do nondiagonal part */
634   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
635   /* send it on its way */
636   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   /* do local part */
638   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
639   /* receive remote parts: note this assumes the values are not actually */
640   /* inserted in yy until the next line, which is true for my implementation*/
641   /* but is not perhaps always true. */
642   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 /*
647   This only works correctly for square matrices where the subblock A->A is the
648    diagonal block
649 */
650 #undef __FUNC__
651 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
652 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
653 {
654   int        ierr;
655   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
656 
657   PetscFunctionBegin;
658   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
659   if (a->rstart != a->cstart || a->rend != a->cend) {
660     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
661   }
662   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNC__
667 #define __FUNC__ "MatScale_MPIAIJ"
668 int MatScale_MPIAIJ(Scalar *aa,Mat A)
669 {
670   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
671   int        ierr;
672 
673   PetscFunctionBegin;
674   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
675   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNC__
680 #define __FUNC__ "MatDestroy_MPIAIJ"
681 int MatDestroy_MPIAIJ(Mat mat)
682 {
683   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
684   int        ierr;
685 
686   PetscFunctionBegin;
687 
688   if (mat->mapping) {
689     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
690   }
691   if (mat->bmapping) {
692     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
693   }
694   if (mat->rmap) {
695     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
696   }
697   if (mat->cmap) {
698     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
699   }
700 #if defined(PETSC_USE_LOG)
701   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
702 #endif
703   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
704   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
706   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
707 #if defined (PETSC_USE_CTABLE)
708   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
709 #else
710   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
711 #endif
712   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
713   if (aij->lvec)   VecDestroy(aij->lvec);
714   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
715   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
716   ierr = PetscFree(aij);CHKERRQ(ierr);
717   PLogObjectDestroy(mat);
718   PetscHeaderDestroy(mat);
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNC__
723 #define __FUNC__ "MatView_MPIAIJ_Binary"
724 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
725 {
726   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
727   int         ierr;
728 
729   PetscFunctionBegin;
730   if (aij->size == 1) {
731     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
732   }
733   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNC__
738 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
739 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
740 {
741   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
742   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
743   int         ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size;
744   PetscTruth  isdraw,isascii;
745 
746   PetscFunctionBegin;
747   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
748   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
749   if (isascii) {
750     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
751     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
752       MatInfo info;
753       int     flg;
754       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
756       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
757       if (flg) {
758         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
759 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
760       } else {
761         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
762 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
763       }
764       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
765       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
766       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
767       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
768       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
769       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
770       PetscFunctionReturn(0);
771     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
772       PetscFunctionReturn(0);
773     }
774   } else if (isdraw) {
775     Draw       draw;
776     PetscTruth isnull;
777     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
778     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
779   }
780 
781   if (size == 1) {
782     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
783   } else {
784     /* assemble the entire matrix onto first processor. */
785     Mat         A;
786     Mat_SeqAIJ *Aloc;
787     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
788     Scalar      *a;
789 
790     if (!rank) {
791       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     } else {
793       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
794     }
795     PLogObjectParent(mat,A);
796 
797     /* copy over the A part */
798     Aloc = (Mat_SeqAIJ*) aij->A->data;
799     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
800     row = aij->rstart;
801     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
802     for ( i=0; i<m; i++ ) {
803       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
804       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
805     }
806     aj = Aloc->j;
807     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
808 
809     /* copy over the B part */
810     Aloc = (Mat_SeqAIJ*) aij->B->data;
811     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
812     row = aij->rstart;
813     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
814     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
815     for ( i=0; i<m; i++ ) {
816       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
817       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
818     }
819     ierr = PetscFree(ct);CHKERRQ(ierr);
820     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     /*
823        Everyone has to call to draw the matrix since the graphics waits are
824        synchronized across all processors that share the Draw object
825     */
826     if (!rank) {
827       Viewer sviewer;
828       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
829       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
830       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
831     }
832     ierr = MatDestroy(A);CHKERRQ(ierr);
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNC__
838 #define __FUNC__ "MatView_MPIAIJ"
839 int MatView_MPIAIJ(Mat mat,Viewer viewer)
840 {
841   int        ierr;
842   PetscTruth isascii,isdraw,issocket,isbinary;
843 
844   PetscFunctionBegin;
845   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
846   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
847   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
848   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
849   if (isascii || isdraw || isbinary || issocket) {
850     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
851   } else {
852     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 /*
858     This has to provide several versions.
859 
860      2) a) use only local smoothing updating outer values only once.
861         b) local smoothing updating outer values each inner iteration
862      3) color updating out values betwen colors.
863 */
864 #undef __FUNC__
865 #define __FUNC__ "MatRelax_MPIAIJ"
866 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
867                            double fshift,int its,Vec xx)
868 {
869   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
870   Mat        AA = mat->A, BB = mat->B;
871   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
872   Scalar     *b,*x,*xs,*ls,d,*v,sum;
873   int        ierr,*idx, *diag;
874   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
875 
876   PetscFunctionBegin;
877   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
878   diag = A->diag;
879   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
880     if (flag & SOR_ZERO_INITIAL_GUESS) {
881       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
882       PetscFunctionReturn(0);
883     }
884     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
885     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
886     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
887     if (xx != bb) {
888       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
889     } else {
890       b = x;
891     }
892     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
893     xs = x + shift; /* shift by one for index start of 1 */
894     ls = ls + shift;
895     while (its--) {
896       /* go down through the rows */
897       for ( i=0; i<m; i++ ) {
898         n    = A->i[i+1] - A->i[i];
899 	PLogFlops(4*n+3);
900         idx  = A->j + A->i[i] + shift;
901         v    = A->a + A->i[i] + shift;
902         sum  = b[i];
903         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904         d    = fshift + A->a[diag[i]+shift];
905         n    = B->i[i+1] - B->i[i];
906         idx  = B->j + B->i[i] + shift;
907         v    = B->a + B->i[i] + shift;
908         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
910       }
911       /* come up through the rows */
912       for ( i=m-1; i>-1; i-- ) {
913         n    = A->i[i+1] - A->i[i];
914 	PLogFlops(4*n+3)
915         idx  = A->j + A->i[i] + shift;
916         v    = A->a + A->i[i] + shift;
917         sum  = b[i];
918         SPARSEDENSEMDOT(sum,xs,v,idx,n);
919         d    = fshift + A->a[diag[i]+shift];
920         n    = B->i[i+1] - B->i[i];
921         idx  = B->j + B->i[i] + shift;
922         v    = B->a + B->i[i] + shift;
923         SPARSEDENSEMDOT(sum,ls,v,idx,n);
924         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
925       }
926     }
927     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
928     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
929     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
930   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
931     if (flag & SOR_ZERO_INITIAL_GUESS) {
932       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
933       PetscFunctionReturn(0);
934     }
935     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
936     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
937     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
938     if (xx != bb) {
939       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
940     } else {
941       b = x;
942     }
943     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
944     xs = x + shift; /* shift by one for index start of 1 */
945     ls = ls + shift;
946     while (its--) {
947       for ( i=0; i<m; i++ ) {
948         n    = A->i[i+1] - A->i[i];
949 	PLogFlops(4*n+3);
950         idx  = A->j + A->i[i] + shift;
951         v    = A->a + A->i[i] + shift;
952         sum  = b[i];
953         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954         d    = fshift + A->a[diag[i]+shift];
955         n    = B->i[i+1] - B->i[i];
956         idx  = B->j + B->i[i] + shift;
957         v    = B->a + B->i[i] + shift;
958         SPARSEDENSEMDOT(sum,ls,v,idx,n);
959         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960       }
961     }
962     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
964     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
965   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
966     if (flag & SOR_ZERO_INITIAL_GUESS) {
967       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     }
970     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
971     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
972     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
973     if (xx != bb) {
974       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
975     } else {
976       b = x;
977     }
978     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
979     xs = x + shift; /* shift by one for index start of 1 */
980     ls = ls + shift;
981     while (its--) {
982       for ( i=m-1; i>-1; i-- ) {
983         n    = A->i[i+1] - A->i[i];
984 	PLogFlops(4*n+3);
985         idx  = A->j + A->i[i] + shift;
986         v    = A->a + A->i[i] + shift;
987         sum  = b[i];
988         SPARSEDENSEMDOT(sum,xs,v,idx,n);
989         d    = fshift + A->a[diag[i]+shift];
990         n    = B->i[i+1] - B->i[i];
991         idx  = B->j + B->i[i] + shift;
992         v    = B->a + B->i[i] + shift;
993         SPARSEDENSEMDOT(sum,ls,v,idx,n);
994         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
995       }
996     }
997     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
998     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
999     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1000   } else {
1001     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNC__
1007 #define __FUNC__ "MatGetInfo_MPIAIJ"
1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1009 {
1010   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1011   Mat        A = mat->A, B = mat->B;
1012   int        ierr;
1013   double     isend[5], irecv[5];
1014 
1015   PetscFunctionBegin;
1016   info->block_size     = 1.0;
1017   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1018   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1019   isend[3] = info->memory;  isend[4] = info->mallocs;
1020   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1021   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1022   isend[3] += info->memory;  isend[4] += info->mallocs;
1023   if (flag == MAT_LOCAL) {
1024     info->nz_used      = isend[0];
1025     info->nz_allocated = isend[1];
1026     info->nz_unneeded  = isend[2];
1027     info->memory       = isend[3];
1028     info->mallocs      = isend[4];
1029   } else if (flag == MAT_GLOBAL_MAX) {
1030     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1031     info->nz_used      = irecv[0];
1032     info->nz_allocated = irecv[1];
1033     info->nz_unneeded  = irecv[2];
1034     info->memory       = irecv[3];
1035     info->mallocs      = irecv[4];
1036   } else if (flag == MAT_GLOBAL_SUM) {
1037     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1038     info->nz_used      = irecv[0];
1039     info->nz_allocated = irecv[1];
1040     info->nz_unneeded  = irecv[2];
1041     info->memory       = irecv[3];
1042     info->mallocs      = irecv[4];
1043   }
1044   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1045   info->fill_ratio_needed = 0;
1046   info->factor_mallocs    = 0;
1047   info->rows_global       = (double)mat->M;
1048   info->columns_global    = (double)mat->N;
1049   info->rows_local        = (double)mat->m;
1050   info->columns_local     = (double)mat->N;
1051 
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNC__
1056 #define __FUNC__ "MatSetOption_MPIAIJ"
1057 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1058 {
1059   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1060   int        ierr;
1061 
1062   PetscFunctionBegin;
1063   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1064       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1065       op == MAT_COLUMNS_UNSORTED ||
1066       op == MAT_COLUMNS_SORTED ||
1067       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1068       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1069         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1070         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1071   } else if (op == MAT_ROW_ORIENTED) {
1072     a->roworiented = 1;
1073     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1074     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1075   } else if (op == MAT_ROWS_SORTED ||
1076              op == MAT_ROWS_UNSORTED ||
1077              op == MAT_SYMMETRIC ||
1078              op == MAT_STRUCTURALLY_SYMMETRIC ||
1079              op == MAT_YES_NEW_DIAGONALS)
1080     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1081   else if (op == MAT_COLUMN_ORIENTED) {
1082     a->roworiented = 0;
1083     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1084     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1085   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1086     a->donotstash = 1;
1087   } else if (op == MAT_NO_NEW_DIAGONALS){
1088     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1089   } else {
1090     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNC__
1096 #define __FUNC__ "MatGetSize_MPIAIJ"
1097 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1098 {
1099   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1100 
1101   PetscFunctionBegin;
1102   if (m) *m = mat->M;
1103   if (n) *n = mat->N;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNC__
1108 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1109 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1110 {
1111   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1112 
1113   PetscFunctionBegin;
1114   if (m) *m = mat->m;
1115   if (n) *n = mat->n;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1121 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1122 {
1123   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1124 
1125   PetscFunctionBegin;
1126   *m = mat->rstart; *n = mat->rend;
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNC__
1131 #define __FUNC__ "MatGetRow_MPIAIJ"
1132 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1133 {
1134   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1135   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1136   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1137   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1138   int        *cmap, *idx_p;
1139 
1140   PetscFunctionBegin;
1141   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1142   mat->getrowactive = PETSC_TRUE;
1143 
1144   if (!mat->rowvalues && (idx || v)) {
1145     /*
1146         allocate enough space to hold information from the longest row.
1147     */
1148     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1149     int     max = 1,m = mat->m,tmp;
1150     for ( i=0; i<m; i++ ) {
1151       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1152       if (max < tmp) { max = tmp; }
1153     }
1154     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1155     mat->rowindices = (int *) (mat->rowvalues + max);
1156   }
1157 
1158   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1159   lrow = row - rstart;
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for ( i=0; i<nzB; i++ ) {
1176           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1181         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for ( i=0; i<imark; i++ ) {
1187             idx_p[i] = cmap[cworkB[i]];
1188           }
1189         } else {
1190           for ( i=0; i<nzB; i++ ) {
1191             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1192             else break;
1193           }
1194           imark = i;
1195         }
1196         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1197         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1198       }
1199     } else {
1200       if (idx) *idx = 0;
1201       if (v)   *v   = 0;
1202     }
1203   }
1204   *nz = nztot;
1205   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1206   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1212 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1213 {
1214   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1215 
1216   PetscFunctionBegin;
1217   if (aij->getrowactive == PETSC_FALSE) {
1218     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1219   }
1220   aij->getrowactive = PETSC_FALSE;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNC__
1225 #define __FUNC__ "MatNorm_MPIAIJ"
1226 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1227 {
1228   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1229   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1230   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1231   double     sum = 0.0;
1232   Scalar     *v;
1233 
1234   PetscFunctionBegin;
1235   if (aij->size == 1) {
1236     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1237   } else {
1238     if (type == NORM_FROBENIUS) {
1239       v = amat->a;
1240       for (i=0; i<amat->nz; i++ ) {
1241 #if defined(PETSC_USE_COMPLEX)
1242         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1243 #else
1244         sum += (*v)*(*v); v++;
1245 #endif
1246       }
1247       v = bmat->a;
1248       for (i=0; i<bmat->nz; i++ ) {
1249 #if defined(PETSC_USE_COMPLEX)
1250         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1251 #else
1252         sum += (*v)*(*v); v++;
1253 #endif
1254       }
1255       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1256       *norm = sqrt(*norm);
1257     } else if (type == NORM_1) { /* max column norm */
1258       double *tmp, *tmp2;
1259       int    *jj, *garray = aij->garray;
1260       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1261       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1262       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1263       *norm = 0.0;
1264       v = amat->a; jj = amat->j;
1265       for ( j=0; j<amat->nz; j++ ) {
1266         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1267       }
1268       v = bmat->a; jj = bmat->j;
1269       for ( j=0; j<bmat->nz; j++ ) {
1270         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1271       }
1272       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1273       for ( j=0; j<aij->N; j++ ) {
1274         if (tmp2[j] > *norm) *norm = tmp2[j];
1275       }
1276       ierr = PetscFree(tmp);CHKERRQ(ierr);
1277       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1278     } else if (type == NORM_INFINITY) { /* max row norm */
1279       double ntemp = 0.0;
1280       for ( j=0; j<amat->m; j++ ) {
1281         v = amat->a + amat->i[j] + shift;
1282         sum = 0.0;
1283         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1284           sum += PetscAbsScalar(*v); v++;
1285         }
1286         v = bmat->a + bmat->i[j] + shift;
1287         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1288           sum += PetscAbsScalar(*v); v++;
1289         }
1290         if (sum > ntemp) ntemp = sum;
1291       }
1292       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1293     } else {
1294       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1295     }
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatTranspose_MPIAIJ"
1302 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1303 {
1304   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1305   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1306   int        ierr,shift = Aloc->indexshift;
1307   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1308   Mat        B;
1309   Scalar     *array;
1310 
1311   PetscFunctionBegin;
1312   if (matout == PETSC_NULL && M != N) {
1313     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1314   }
1315 
1316   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1317 
1318   /* copy over the A part */
1319   Aloc = (Mat_SeqAIJ*) a->A->data;
1320   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1321   row = a->rstart;
1322   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1323   for ( i=0; i<m; i++ ) {
1324     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1325     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1326   }
1327   aj = Aloc->j;
1328   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1329 
1330   /* copy over the B part */
1331   Aloc = (Mat_SeqAIJ*) a->B->data;
1332   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1333   row = a->rstart;
1334   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1335   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1336   for ( i=0; i<m; i++ ) {
1337     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1338     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1339   }
1340   ierr = PetscFree(ct);CHKERRQ(ierr);
1341   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1342   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1343   if (matout != PETSC_NULL) {
1344     *matout = B;
1345   } else {
1346     PetscOps       *Abops;
1347     struct _MatOps *Aops;
1348 
1349     /* This isn't really an in-place transpose .... but free data structures from a */
1350     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1351     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1352     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1353 #if defined (PETSC_USE_CTABLE)
1354     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1355 #else
1356     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1357 #endif
1358     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1359     if (a->lvec) VecDestroy(a->lvec);
1360     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1361     ierr = PetscFree(a);CHKERRQ(ierr);
1362 
1363     /*
1364        This is horrible, horrible code. We need to keep the
1365       A pointers for the bops and ops but copy everything
1366       else from C.
1367     */
1368     Abops   = A->bops;
1369     Aops    = A->ops;
1370     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1371     A->bops = Abops;
1372     A->ops  = Aops;
1373     PetscHeaderDestroy(B);
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNC__
1379 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1380 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1381 {
1382   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1383   Mat        a = aij->A, b = aij->B;
1384   int        ierr,s1,s2,s3;
1385 
1386   PetscFunctionBegin;
1387   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1388   if (rr) {
1389     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1390     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1391     /* Overlap communication with computation. */
1392     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1393   }
1394   if (ll) {
1395     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1396     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1397     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1398   }
1399   /* scale  the diagonal block */
1400   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1401 
1402   if (rr) {
1403     /* Do a scatter end and then right scale the off-diagonal block */
1404     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1405     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1406   }
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 
1412 #undef __FUNC__
1413 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1414 int MatPrintHelp_MPIAIJ(Mat A)
1415 {
1416   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1417   int        ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNC__
1427 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1428 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1429 {
1430   PetscFunctionBegin;
1431   *bs = 1;
1432   PetscFunctionReturn(0);
1433 }
1434 #undef __FUNC__
1435 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1436 int MatSetUnfactored_MPIAIJ(Mat A)
1437 {
1438   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1439   int        ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNC__
1447 #define __FUNC__ "MatEqual_MPIAIJ"
1448 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1449 {
1450   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1451   Mat        a, b, c, d;
1452   PetscTruth flg;
1453   int        ierr;
1454 
1455   PetscFunctionBegin;
1456   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNC__
1469 #define __FUNC__ "MatCopy_MPIAIJ"
1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471 {
1472   int        ierr;
1473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475 
1476   PetscFunctionBegin;
1477   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1478     /* because of the column compression in the off-processor part of the matrix a->B,
1479        the number of columns in a->B and b->B may be different, hence we cannot call
1480        the MatCopy() directly on the two parts. If need be, we can provide a more
1481        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1482        then copying the submatrices */
1483     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1484   } else {
1485     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1486     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1492 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1493 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1494 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1495 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1496 
1497 /* -------------------------------------------------------------------*/
1498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1499        MatGetRow_MPIAIJ,
1500        MatRestoreRow_MPIAIJ,
1501        MatMult_MPIAIJ,
1502        MatMultAdd_MPIAIJ,
1503        MatMultTrans_MPIAIJ,
1504        MatMultTransAdd_MPIAIJ,
1505        0,
1506        0,
1507        0,
1508        0,
1509        0,
1510        0,
1511        MatRelax_MPIAIJ,
1512        MatTranspose_MPIAIJ,
1513        MatGetInfo_MPIAIJ,
1514        MatEqual_MPIAIJ,
1515        MatGetDiagonal_MPIAIJ,
1516        MatDiagonalScale_MPIAIJ,
1517        MatNorm_MPIAIJ,
1518        MatAssemblyBegin_MPIAIJ,
1519        MatAssemblyEnd_MPIAIJ,
1520        0,
1521        MatSetOption_MPIAIJ,
1522        MatZeroEntries_MPIAIJ,
1523        MatZeroRows_MPIAIJ,
1524        0,
1525        0,
1526        0,
1527        0,
1528        MatGetSize_MPIAIJ,
1529        MatGetLocalSize_MPIAIJ,
1530        MatGetOwnershipRange_MPIAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535        MatDuplicate_MPIAIJ,
1536        0,
1537        0,
1538        0,
1539        0,
1540        0,
1541        MatGetSubMatrices_MPIAIJ,
1542        MatIncreaseOverlap_MPIAIJ,
1543        MatGetValues_MPIAIJ,
1544        MatCopy_MPIAIJ,
1545        MatPrintHelp_MPIAIJ,
1546        MatScale_MPIAIJ,
1547        0,
1548        0,
1549        0,
1550        MatGetBlockSize_MPIAIJ,
1551        0,
1552        0,
1553        0,
1554        0,
1555        MatFDColoringCreate_MPIAIJ,
1556        0,
1557        MatSetUnfactored_MPIAIJ,
1558        0,
1559        0,
1560        MatGetSubMatrix_MPIAIJ,
1561        0,
1562        0,
1563        MatGetMaps_Petsc};
1564 
1565 /* ----------------------------------------------------------------------------------------*/
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNC__
1569 #define __FUNC__ "MatStoreValues_MPIAIJ"
1570 int MatStoreValues_MPIAIJ(Mat mat)
1571 {
1572   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1573   int        ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1577   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 EXTERN_C_END
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNC__
1584 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1585 int MatRetrieveValues_MPIAIJ(Mat mat)
1586 {
1587   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1588   int        ierr;
1589 
1590   PetscFunctionBegin;
1591   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1592   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 EXTERN_C_END
1596 
1597 #include "pc.h"
1598 EXTERN_C_BEGIN
1599 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1600 EXTERN_C_END
1601 
1602 #undef __FUNC__
1603 #define __FUNC__ "MatCreateMPIAIJ"
1604 /*@C
1605    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1606    (the default parallel PETSc format).  For good matrix assembly performance
1607    the user should preallocate the matrix storage by setting the parameters
1608    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1609    performance can be increased by more than a factor of 50.
1610 
1611    Collective on MPI_Comm
1612 
1613    Input Parameters:
1614 +  comm - MPI communicator
1615 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1616            This value should be the same as the local size used in creating the
1617            y vector for the matrix-vector product y = Ax.
1618 .  n - This value should be the same as the local size used in creating the
1619        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1620        calculated if N is given) For square matrices n is almost always m.
1621 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1622 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1623 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1624            (same value is used for all local rows)
1625 .  d_nnz - array containing the number of nonzeros in the various rows of the
1626            DIAGONAL portion of the local submatrix (possibly different for each row)
1627            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1628            The size of this array is equal to the number of local rows, i.e 'm'.
1629            You must leave room for the diagonal entry even if it is zero.
1630 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1631            submatrix (same value is used for all local rows).
1632 -  o_nnz - array containing the number of nonzeros in the various rows of the
1633            OFF-DIAGONAL portion of the local submatrix (possibly different for
1634            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1635            structure. The size of this array is equal to the number
1636            of local rows, i.e 'm'.
1637 
1638    Output Parameter:
1639 .  A - the matrix
1640 
1641    Notes:
1642    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1643    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1644    storage requirements for this matrix.
1645 
1646    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1647    processor than it must be used on all processors that share the object for
1648    that argument.
1649 
1650    The AIJ format (also called the Yale sparse matrix format or
1651    compressed row storage), is fully compatible with standard Fortran 77
1652    storage.  That is, the stored row and column indices can begin at
1653    either one (as in Fortran) or zero.  See the users manual for details.
1654 
1655    The user MUST specify either the local or global matrix dimensions
1656    (possibly both).
1657 
1658    The parallel matrix is partitioned such that the first m0 rows belong to
1659    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1660    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1661 
1662    The DIAGONAL portion of the local submatrix of a processor can be defined
1663    as the submatrix which is obtained by extraction the part corresponding
1664    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1665    first row that belongs to the processor, and r2 is the last row belonging
1666    to the this processor. This is a square mxm matrix. The remaining portion
1667    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1668 
1669    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1670 
1671    By default, this format uses inodes (identical nodes) when possible.
1672    We search for consecutive rows with the same nonzero structure, thereby
1673    reusing matrix information to achieve increased efficiency.
1674 
1675    Options Database Keys:
1676 +  -mat_aij_no_inode  - Do not use inodes
1677 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1678 -  -mat_aij_oneindex - Internally use indexing starting at 1
1679         rather than 0.  Note that when calling MatSetValues(),
1680         the user still MUST index entries starting at 0!
1681 .   -mat_mpi - use the parallel matrix data structures even on one processor
1682                (defaults to using SeqBAIJ format on one processor)
1683 .   -mat_mpi - use the parallel matrix data structures even on one processor
1684                (defaults to using SeqAIJ format on one processor)
1685 
1686 
1687    Example usage:
1688 
1689    Consider the following 8x8 matrix with 34 non-zero values, that is
1690    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1691    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1692    as follows:
1693 
1694 .vb
1695             1  2  0  |  0  3  0  |  0  4
1696     Proc0   0  5  6  |  7  0  0  |  8  0
1697             9  0 10  | 11  0  0  | 12  0
1698     -------------------------------------
1699            13  0 14  | 15 16 17  |  0  0
1700     Proc1   0 18  0  | 19 20 21  |  0  0
1701             0  0  0  | 22 23  0  | 24  0
1702     -------------------------------------
1703     Proc2  25 26 27  |  0  0 28  | 29  0
1704            30  0  0  | 31 32 33  |  0 34
1705 .ve
1706 
1707    This can be represented as a collection of submatrices as:
1708 
1709 .vb
1710       A B C
1711       D E F
1712       G H I
1713 .ve
1714 
1715    Where the submatrices A,B,C are owned by proc0, D,E,F are
1716    owned by proc1, G,H,I are owned by proc2.
1717 
1718    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1719    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1720    The 'M','N' parameters are 8,8, and have the same values on all procs.
1721 
1722    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1723    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1724    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1725    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1726    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1727    matrix, ans [DF] as another SeqAIJ matrix.
1728 
1729    When d_nz, o_nz parameters are specified, d_nz storage elements are
1730    allocated for every row of the local diagonal submatrix, and o_nz
1731    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1732    One way to choose d_nz and o_nz is to use the max nonzerors per local
1733    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1734    In this case, the values of d_nz,o_nz are:
1735 .vb
1736      proc0 : dnz = 2, o_nz = 2
1737      proc1 : dnz = 3, o_nz = 2
1738      proc2 : dnz = 1, o_nz = 4
1739 .ve
1740    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1741    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1742    for proc3. i.e we are using 12+15+10=37 storage locations to store
1743    34 values.
1744 
1745    When d_nnz, o_nnz parameters are specified, the storage is specified
1746    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1747    In the above case the values for d_nnz,o_nnz are:
1748 .vb
1749      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1750      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1751      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1752 .ve
1753    Here the space allocated is sum of all the above values i.e 34, and
1754    hence pre-allocation is perfect.
1755 
1756    Level: intermediate
1757 
1758 .keywords: matrix, aij, compressed row, sparse, parallel
1759 
1760 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1761 @*/
1762 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1763 {
1764   Mat          B;
1765   Mat_MPIAIJ   *b;
1766   int          ierr, i,size,flag1 = 0, flag2 = 0;
1767 
1768   PetscFunctionBegin;
1769 
1770   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1771   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1772   if (d_nnz) {
1773     for (i=0; i<m; i++) {
1774       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1775     }
1776   }
1777   if (o_nnz) {
1778     for (i=0; i<m; i++) {
1779       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1780     }
1781   }
1782 
1783   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1784   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1785   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1786   if (!flag1 && !flag2 && size == 1) {
1787     if (M == PETSC_DECIDE) M = m;
1788     if (N == PETSC_DECIDE) N = n;
1789     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1790     PetscFunctionReturn(0);
1791   }
1792 
1793   *A = 0;
1794   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1795   PLogObjectCreate(B);
1796   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1797   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1798   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1799   B->ops->destroy = MatDestroy_MPIAIJ;
1800   B->ops->view    = MatView_MPIAIJ;
1801   B->factor       = 0;
1802   B->assembled    = PETSC_FALSE;
1803   B->mapping      = 0;
1804 
1805   B->insertmode      = NOT_SET_VALUES;
1806   b->size            = size;
1807   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1808 
1809   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1810     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1811   }
1812 
1813   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1814   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1815   b->m = m; B->m = m;
1816   b->n = n; B->n = n;
1817   b->N = N; B->N = N;
1818   b->M = M; B->M = M;
1819 
1820   /* the information in the maps duplicates the information computed below, eventually
1821      we should remove the duplicate information that is not contained in the maps */
1822   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1823   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1824 
1825   /* build local table of row and column ownerships */
1826   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1827   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1828   b->cowners = b->rowners + b->size + 2;
1829   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1830   b->rowners[0] = 0;
1831   for ( i=2; i<=b->size; i++ ) {
1832     b->rowners[i] += b->rowners[i-1];
1833   }
1834   b->rstart = b->rowners[b->rank];
1835   b->rend   = b->rowners[b->rank+1];
1836   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1837   b->cowners[0] = 0;
1838   for ( i=2; i<=b->size; i++ ) {
1839     b->cowners[i] += b->cowners[i-1];
1840   }
1841   b->cstart = b->cowners[b->rank];
1842   b->cend   = b->cowners[b->rank+1];
1843 
1844   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1845   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1846   PLogObjectParent(B,b->A);
1847   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1848   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1849   PLogObjectParent(B,b->B);
1850 
1851   /* build cache for off array entries formed */
1852   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1853   b->donotstash  = 0;
1854   b->colmap      = 0;
1855   b->garray      = 0;
1856   b->roworiented = 1;
1857 
1858   /* stuff used for matrix vector multiply */
1859   b->lvec      = 0;
1860   b->Mvctx     = 0;
1861 
1862   /* stuff for MatGetRow() */
1863   b->rowindices   = 0;
1864   b->rowvalues    = 0;
1865   b->getrowactive = PETSC_FALSE;
1866 
1867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1868                                      "MatStoreValues_MPIAIJ",
1869                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1871                                      "MatRetrieveValues_MPIAIJ",
1872                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1874                                      "MatGetDiagonalBlock_MPIAIJ",
1875                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1876   *A = B;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNC__
1881 #define __FUNC__ "MatDuplicate_MPIAIJ"
1882 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1883 {
1884   Mat        mat;
1885   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1886   int        ierr, len=0, flg;
1887 
1888   PetscFunctionBegin;
1889   *newmat       = 0;
1890   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1891   PLogObjectCreate(mat);
1892   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1893   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1894   mat->ops->destroy = MatDestroy_MPIAIJ;
1895   mat->ops->view    = MatView_MPIAIJ;
1896   mat->factor       = matin->factor;
1897   mat->assembled    = PETSC_TRUE;
1898   mat->insertmode   = NOT_SET_VALUES;
1899 
1900   a->m = mat->m   = oldmat->m;
1901   a->n = mat->n   = oldmat->n;
1902   a->M = mat->M   = oldmat->M;
1903   a->N = mat->N   = oldmat->N;
1904 
1905   a->rstart       = oldmat->rstart;
1906   a->rend         = oldmat->rend;
1907   a->cstart       = oldmat->cstart;
1908   a->cend         = oldmat->cend;
1909   a->size         = oldmat->size;
1910   a->rank         = oldmat->rank;
1911   a->donotstash   = oldmat->donotstash;
1912   a->roworiented  = oldmat->roworiented;
1913   a->rowindices   = 0;
1914   a->rowvalues    = 0;
1915   a->getrowactive = PETSC_FALSE;
1916 
1917   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1918   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1919   a->cowners = a->rowners + a->size + 2;
1920   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1921   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1922   if (oldmat->colmap) {
1923 #if defined (PETSC_USE_CTABLE)
1924     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1925 #else
1926     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1927     PLogObjectMemory(mat,(a->N)*sizeof(int));
1928     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1929 #endif
1930   } else a->colmap = 0;
1931   if (oldmat->garray) {
1932     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1933     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1934     PLogObjectMemory(mat,len*sizeof(int));
1935     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1936   } else a->garray = 0;
1937 
1938   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1939   PLogObjectParent(mat,a->lvec);
1940   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1941   PLogObjectParent(mat,a->Mvctx);
1942   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1943   PLogObjectParent(mat,a->A);
1944   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1945   PLogObjectParent(mat,a->B);
1946   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1947   if (flg) {
1948     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1949   }
1950   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1951   *newmat = mat;
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #include "sys.h"
1956 
1957 #undef __FUNC__
1958 #define __FUNC__ "MatLoad_MPIAIJ"
1959 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1960 {
1961   Mat          A;
1962   Scalar       *vals,*svals;
1963   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1964   MPI_Status   status;
1965   int          i, nz, ierr, j,rstart, rend, fd;
1966   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1967   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1968   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1969 
1970   PetscFunctionBegin;
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1973   if (!rank) {
1974     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1977     if (header[3] < 0) {
1978       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1979     }
1980   }
1981 
1982   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1983   M = header[1]; N = header[2];
1984   /* determine ownership of all rows */
1985   m = M/size + ((M % size) > rank);
1986   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1987   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1988   rowners[0] = 0;
1989   for ( i=2; i<=size; i++ ) {
1990     rowners[i] += rowners[i-1];
1991   }
1992   rstart = rowners[rank];
1993   rend   = rowners[rank+1];
1994 
1995   /* distribute row lengths to all processors */
1996   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1997   offlens = ourlens + (rend-rstart);
1998   if (!rank) {
1999     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
2000     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2001     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2002     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2003     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2004     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2005   } else {
2006     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2007   }
2008 
2009   if (!rank) {
2010     /* calculate the number of nonzeros on each processor */
2011     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2012     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2013     for ( i=0; i<size; i++ ) {
2014       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2015         procsnz[i] += rowlengths[j];
2016       }
2017     }
2018     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2019 
2020     /* determine max buffer needed and allocate it */
2021     maxnz = 0;
2022     for ( i=0; i<size; i++ ) {
2023       maxnz = PetscMax(maxnz,procsnz[i]);
2024     }
2025     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2026 
2027     /* read in my part of the matrix column indices  */
2028     nz     = procsnz[0];
2029     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2030     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2031 
2032     /* read in every one elses and ship off */
2033     for ( i=1; i<size; i++ ) {
2034       nz   = procsnz[i];
2035       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2036       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2037     }
2038     ierr = PetscFree(cols);CHKERRQ(ierr);
2039   } else {
2040     /* determine buffer space needed for message */
2041     nz = 0;
2042     for ( i=0; i<m; i++ ) {
2043       nz += ourlens[i];
2044     }
2045     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2046 
2047     /* receive message of column indices*/
2048     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2049     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2050     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2051   }
2052 
2053   /* determine column ownership if matrix is not square */
2054   if (N != M) {
2055     n      = N/size + ((N % size) > rank);
2056     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2057     cstart = cend - n;
2058   } else {
2059     cstart = rstart;
2060     cend   = rend;
2061     n      = cend - cstart;
2062   }
2063 
2064   /* loop over local rows, determining number of off diagonal entries */
2065   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2066   jj = 0;
2067   for ( i=0; i<m; i++ ) {
2068     for ( j=0; j<ourlens[i]; j++ ) {
2069       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2070       jj++;
2071     }
2072   }
2073 
2074   /* create our matrix */
2075   for ( i=0; i<m; i++ ) {
2076     ourlens[i] -= offlens[i];
2077   }
2078   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2079   A = *newmat;
2080   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2081   for ( i=0; i<m; i++ ) {
2082     ourlens[i] += offlens[i];
2083   }
2084 
2085   if (!rank) {
2086     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2087 
2088     /* read in my part of the matrix numerical values  */
2089     nz   = procsnz[0];
2090     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2091 
2092     /* insert into matrix */
2093     jj      = rstart;
2094     smycols = mycols;
2095     svals   = vals;
2096     for ( i=0; i<m; i++ ) {
2097       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2098       smycols += ourlens[i];
2099       svals   += ourlens[i];
2100       jj++;
2101     }
2102 
2103     /* read in other processors and ship out */
2104     for ( i=1; i<size; i++ ) {
2105       nz   = procsnz[i];
2106       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2107       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2108     }
2109     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2110   } else {
2111     /* receive numeric values */
2112     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2113 
2114     /* receive message of values*/
2115     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2116     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2117     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2118 
2119     /* insert into matrix */
2120     jj      = rstart;
2121     smycols = mycols;
2122     svals   = vals;
2123     for ( i=0; i<m; i++ ) {
2124       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2125       smycols += ourlens[i];
2126       svals   += ourlens[i];
2127       jj++;
2128     }
2129   }
2130   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2131   ierr = PetscFree(vals);CHKERRQ(ierr);
2132   ierr = PetscFree(mycols);CHKERRQ(ierr);
2133   ierr = PetscFree(rowners);CHKERRQ(ierr);
2134 
2135   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNC__
2141 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2142 /*
2143     Not great since it makes two copies of the submatrix, first an SeqAIJ
2144   in local and then by concatenating the local matrices the end result.
2145   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2146 */
2147 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2148 {
2149   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2150   Mat        *local,M, Mreuse;
2151   Scalar     *vwork,*aa;
2152   MPI_Comm   comm = mat->comm;
2153   Mat_SeqAIJ *aij;
2154   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2155 
2156   PetscFunctionBegin;
2157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2159 
2160   if (call ==  MAT_REUSE_MATRIX) {
2161     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2162     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2163     local = &Mreuse;
2164     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2165   } else {
2166     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2167     Mreuse = *local;
2168     ierr = PetscFree(local);CHKERRQ(ierr);
2169   }
2170 
2171   /*
2172       m - number of local rows
2173       n - number of columns (same on all processors)
2174       rstart - first row in new global matrix generated
2175   */
2176   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2177   if (call == MAT_INITIAL_MATRIX) {
2178     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2179     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2180     ii  = aij->i;
2181     jj  = aij->j;
2182 
2183     /*
2184         Determine the number of non-zeros in the diagonal and off-diagonal
2185         portions of the matrix in order to do correct preallocation
2186     */
2187 
2188     /* first get start and end of "diagonal" columns */
2189     if (csize == PETSC_DECIDE) {
2190       nlocal = n/size + ((n % size) > rank);
2191     } else {
2192       nlocal = csize;
2193     }
2194     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2195     rstart = rend - nlocal;
2196     if (rank == size - 1 && rend != n) {
2197       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2198     }
2199 
2200     /* next, compute all the lengths */
2201     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2202     olens = dlens + m;
2203     for ( i=0; i<m; i++ ) {
2204       jend = ii[i+1] - ii[i];
2205       olen = 0;
2206       dlen = 0;
2207       for ( j=0; j<jend; j++ ) {
2208         if ( *jj < rstart || *jj >= rend) olen++;
2209         else dlen++;
2210         jj++;
2211       }
2212       olens[i] = olen;
2213       dlens[i] = dlen;
2214     }
2215     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2216     ierr = PetscFree(dlens);CHKERRQ(ierr);
2217   } else {
2218     int ml,nl;
2219 
2220     M = *newmat;
2221     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2222     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2223     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2224     /*
2225          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2226        rather than the slower MatSetValues().
2227     */
2228     M->was_assembled = PETSC_TRUE;
2229     M->assembled     = PETSC_FALSE;
2230   }
2231   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2232   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2233   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2234   ii  = aij->i;
2235   jj  = aij->j;
2236   aa  = aij->a;
2237   for (i=0; i<m; i++) {
2238     row   = rstart + i;
2239     nz    = ii[i+1] - ii[i];
2240     cwork = jj;     jj += nz;
2241     vwork = aa;     aa += nz;
2242     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2243   }
2244 
2245   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247   *newmat = M;
2248 
2249   /* save submatrix used in processor for next request */
2250   if (call ==  MAT_INITIAL_MATRIX) {
2251     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2252     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2253   }
2254 
2255   PetscFunctionReturn(0);
2256 }
2257