xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ac2a4f0d24b3b6a4ee93edbcad41f4bb9e923944)
1 /*$Id: mpiaij.c,v 1.304 1999/10/13 20:37:20 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 extern int MatSetUpMultiply_MPIAIJ(Mat);
8 extern int DisAssemble_MPIAIJ(Mat);
9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 extern int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
455   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
456   nrecvs = work[size+rank];
457   nmax   = work[rank];
458   ierr   = PetscFree(work);CHKERRQ(ierr);
459 
460   /* post receives:   */
461   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
462   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
463   for ( i=0; i<nrecvs; i++ ) {
464     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
465   }
466 
467   /* do sends:
468       1) starts[i] gives the starting index in svalues for stuff going to
469          the ith processor
470   */
471   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
472   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
473   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
474   starts[0] = 0;
475   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
476   for ( i=0; i<N; i++ ) {
477     svalues[starts[owner[i]]++] = rows[i];
478   }
479   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
480 
481   starts[0] = 0;
482   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
483   count = 0;
484   for ( i=0; i<size; i++ ) {
485     if (procs[i]) {
486       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
487     }
488   }
489   ierr = PetscFree(starts);CHKERRQ(ierr);
490 
491   base = owners[rank];
492 
493   /*  wait on receives */
494   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
495   source = lens + nrecvs;
496   count  = nrecvs; slen = 0;
497   while (count) {
498     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
499     /* unpack receives into our local space */
500     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
501     source[imdex]  = recv_status.MPI_SOURCE;
502     lens[imdex]    = n;
503     slen          += n;
504     count--;
505   }
506   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
507 
508   /* move the data into the send scatter */
509   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
510   count = 0;
511   for ( i=0; i<nrecvs; i++ ) {
512     values = rvalues + i*nmax;
513     for ( j=0; j<lens[i]; j++ ) {
514       lrows[count++] = values[j] - base;
515     }
516   }
517   ierr = PetscFree(rvalues);CHKERRQ(ierr);
518   ierr = PetscFree(lens);CHKERRQ(ierr);
519   ierr = PetscFree(owner);CHKERRQ(ierr);
520   ierr = PetscFree(nprocs);CHKERRQ(ierr);
521 
522   /* actually zap the local rows */
523   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
524   PLogObjectParent(A,istmp);
525 
526   /*
527         Zero the required rows. If the "diagonal block" of the matrix
528      is square and the user wishes to set the diagonal we use seperate
529      code so that MatSetValues() is not called for each diagonal allocating
530      new memory, thus calling lots of mallocs and slowing things down.
531 
532        Contributed by: Mathew Knepley
533   */
534   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
535   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
536   if (diag && (l->A->M == l->A->N)) {
537     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
538   } else if (diag) {
539     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
540     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
541       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
542 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
543     }
544     for ( i = 0; i < slen; i++ ) {
545       row  = lrows[i] + rstart;
546       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
547     }
548     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550   } else {
551     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
552   }
553   ierr = ISDestroy(istmp);CHKERRQ(ierr);
554   ierr = PetscFree(lrows);CHKERRQ(ierr);
555 
556   /* wait on sends */
557   if (nsends) {
558     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
559     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
560     ierr        = PetscFree(send_status);CHKERRQ(ierr);
561   }
562   ierr = PetscFree(send_waits);CHKERRQ(ierr);
563   ierr = PetscFree(svalues);CHKERRQ(ierr);
564 
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNC__
569 #define __FUNC__ "MatMult_MPIAIJ"
570 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
571 {
572   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
573   int        ierr,nt;
574 
575   PetscFunctionBegin;
576   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
577   if (nt != a->n) {
578     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
579   }
580   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
581   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
582   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNC__
588 #define __FUNC__ "MatMultAdd_MPIAIJ"
589 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590 {
591   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
592   int        ierr;
593 
594   PetscFunctionBegin;
595   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
596   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
597   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNC__
603 #define __FUNC__ "MatMultTrans_MPIAIJ"
604 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
605 {
606   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
607   int        ierr;
608 
609   PetscFunctionBegin;
610   /* do nondiagonal part */
611   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
612   /* send it on its way */
613   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
614   /* do local part */
615   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
616   /* receive remote parts: note this assumes the values are not actually */
617   /* inserted in yy until the next line, which is true for my implementation*/
618   /* but is not perhaps always true. */
619   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNC__
624 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
625 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 /*
645   This only works correctly for square matrices where the subblock A->A is the
646    diagonal block
647 */
648 #undef __FUNC__
649 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
650 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
651 {
652   int        ierr;
653   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
654 
655   PetscFunctionBegin;
656   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
657   if (a->rstart != a->cstart || a->rend != a->cend) {
658     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
659   }
660   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatScale_MPIAIJ"
666 int MatScale_MPIAIJ(Scalar *aa,Mat A)
667 {
668   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
669   int        ierr;
670 
671   PetscFunctionBegin;
672   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
673   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNC__
678 #define __FUNC__ "MatDestroy_MPIAIJ"
679 int MatDestroy_MPIAIJ(Mat mat)
680 {
681   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
682   int        ierr;
683 
684   PetscFunctionBegin;
685 
686   if (mat->mapping) {
687     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
688   }
689   if (mat->bmapping) {
690     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
691   }
692   if (mat->rmap) {
693     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
694   }
695   if (mat->cmap) {
696     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
697   }
698 #if defined(PETSC_USE_LOG)
699   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
700 #endif
701   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
702   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
703   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
705 #if defined (PETSC_USE_CTABLE)
706   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
707 #else
708   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
709 #endif
710   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
711   if (aij->lvec)   VecDestroy(aij->lvec);
712   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
713   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
714   ierr = PetscFree(aij);CHKERRQ(ierr);
715   PLogObjectDestroy(mat);
716   PetscHeaderDestroy(mat);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatView_MPIAIJ_Binary"
722 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
723 {
724   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
725   int         ierr;
726 
727   PetscFunctionBegin;
728   if (aij->size == 1) {
729     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
730   }
731   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNC__
736 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
737 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
738 {
739   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
740   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
741   int         ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size;
742   PetscTruth  isdraw,isascii;
743 
744   PetscFunctionBegin;
745   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
746   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
747   if (isascii) {
748     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
749     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
750       MatInfo info;
751       int     flg;
752       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
753       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
755       if (flg) {
756         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
757 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
758       } else {
759         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
760 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
761       }
762       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
763       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
764       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
765       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
766       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
767       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
768       PetscFunctionReturn(0);
769     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
770       PetscFunctionReturn(0);
771     }
772   } else if (isdraw) {
773     Draw       draw;
774     PetscTruth isnull;
775     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
776     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
777   }
778 
779   if (size == 1) {
780     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
781   } else {
782     /* assemble the entire matrix onto first processor. */
783     Mat         A;
784     Mat_SeqAIJ *Aloc;
785     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
786     Scalar      *a;
787 
788     if (!rank) {
789       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
790     } else {
791       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     }
793     PLogObjectParent(mat,A);
794 
795     /* copy over the A part */
796     Aloc = (Mat_SeqAIJ*) aij->A->data;
797     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
798     row = aij->rstart;
799     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
800     for ( i=0; i<m; i++ ) {
801       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
802       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
803     }
804     aj = Aloc->j;
805     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
806 
807     /* copy over the B part */
808     Aloc = (Mat_SeqAIJ*) aij->B->data;
809     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
810     row = aij->rstart;
811     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
812     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
813     for ( i=0; i<m; i++ ) {
814       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
815       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
816     }
817     ierr = PetscFree(ct);CHKERRQ(ierr);
818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     /*
821        Everyone has to call to draw the matrix since the graphics waits are
822        synchronized across all processors that share the Draw object
823     */
824     if (!rank) {
825       Viewer sviewer;
826       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
827       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
828       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
829     }
830     ierr = MatDestroy(A);CHKERRQ(ierr);
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNC__
836 #define __FUNC__ "MatView_MPIAIJ"
837 int MatView_MPIAIJ(Mat mat,Viewer viewer)
838 {
839   int        ierr;
840   PetscTruth isascii,isdraw,issocket,isbinary;
841 
842   PetscFunctionBegin;
843   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
844   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
845   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
846   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
847   if (isascii || isdraw || isbinary || issocket) {
848     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
849   } else {
850     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856     This has to provide several versions.
857 
858      2) a) use only local smoothing updating outer values only once.
859         b) local smoothing updating outer values each inner iteration
860      3) color updating out values betwen colors.
861 */
862 #undef __FUNC__
863 #define __FUNC__ "MatRelax_MPIAIJ"
864 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
865                            double fshift,int its,Vec xx)
866 {
867   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
868   Mat        AA = mat->A, BB = mat->B;
869   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
870   Scalar     *b,*x,*xs,*ls,d,*v,sum;
871   int        ierr,*idx, *diag;
872   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
873 
874   PetscFunctionBegin;
875   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
876   diag = A->diag;
877   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
878     if (flag & SOR_ZERO_INITIAL_GUESS) {
879       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
880       PetscFunctionReturn(0);
881     }
882     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
883     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
884     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
885     if (xx != bb) {
886       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
887     } else {
888       b = x;
889     }
890     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
891     xs = x + shift; /* shift by one for index start of 1 */
892     ls = ls + shift;
893     while (its--) {
894       /* go down through the rows */
895       for ( i=0; i<m; i++ ) {
896         n    = A->i[i+1] - A->i[i];
897 	PLogFlops(4*n+3);
898         idx  = A->j + A->i[i] + shift;
899         v    = A->a + A->i[i] + shift;
900         sum  = b[i];
901         SPARSEDENSEMDOT(sum,xs,v,idx,n);
902         d    = fshift + A->a[diag[i]+shift];
903         n    = B->i[i+1] - B->i[i];
904         idx  = B->j + B->i[i] + shift;
905         v    = B->a + B->i[i] + shift;
906         SPARSEDENSEMDOT(sum,ls,v,idx,n);
907         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
908       }
909       /* come up through the rows */
910       for ( i=m-1; i>-1; i-- ) {
911         n    = A->i[i+1] - A->i[i];
912 	PLogFlops(4*n+3)
913         idx  = A->j + A->i[i] + shift;
914         v    = A->a + A->i[i] + shift;
915         sum  = b[i];
916         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917         d    = fshift + A->a[diag[i]+shift];
918         n    = B->i[i+1] - B->i[i];
919         idx  = B->j + B->i[i] + shift;
920         v    = B->a + B->i[i] + shift;
921         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
923       }
924     }
925     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
926     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
927     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
928   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
929     if (flag & SOR_ZERO_INITIAL_GUESS) {
930       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
931       PetscFunctionReturn(0);
932     }
933     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
934     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
935     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
936     if (xx != bb) {
937       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
938     } else {
939       b = x;
940     }
941     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
942     xs = x + shift; /* shift by one for index start of 1 */
943     ls = ls + shift;
944     while (its--) {
945       for ( i=0; i<m; i++ ) {
946         n    = A->i[i+1] - A->i[i];
947 	PLogFlops(4*n+3);
948         idx  = A->j + A->i[i] + shift;
949         v    = A->a + A->i[i] + shift;
950         sum  = b[i];
951         SPARSEDENSEMDOT(sum,xs,v,idx,n);
952         d    = fshift + A->a[diag[i]+shift];
953         n    = B->i[i+1] - B->i[i];
954         idx  = B->j + B->i[i] + shift;
955         v    = B->a + B->i[i] + shift;
956         SPARSEDENSEMDOT(sum,ls,v,idx,n);
957         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
958       }
959     }
960     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
961     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
962     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
963   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
964     if (flag & SOR_ZERO_INITIAL_GUESS) {
965       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     }
968     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
969     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
970     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
971     if (xx != bb) {
972       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
973     } else {
974       b = x;
975     }
976     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
977     xs = x + shift; /* shift by one for index start of 1 */
978     ls = ls + shift;
979     while (its--) {
980       for ( i=m-1; i>-1; i-- ) {
981         n    = A->i[i+1] - A->i[i];
982 	PLogFlops(4*n+3);
983         idx  = A->j + A->i[i] + shift;
984         v    = A->a + A->i[i] + shift;
985         sum  = b[i];
986         SPARSEDENSEMDOT(sum,xs,v,idx,n);
987         d    = fshift + A->a[diag[i]+shift];
988         n    = B->i[i+1] - B->i[i];
989         idx  = B->j + B->i[i] + shift;
990         v    = B->a + B->i[i] + shift;
991         SPARSEDENSEMDOT(sum,ls,v,idx,n);
992         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
993       }
994     }
995     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
996     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
997     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
998   } else {
999     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNC__
1005 #define __FUNC__ "MatGetInfo_MPIAIJ"
1006 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1007 {
1008   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1009   Mat        A = mat->A, B = mat->B;
1010   int        ierr;
1011   double     isend[5], irecv[5];
1012 
1013   PetscFunctionBegin;
1014   info->block_size     = 1.0;
1015   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1016   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1017   isend[3] = info->memory;  isend[4] = info->mallocs;
1018   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1019   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1020   isend[3] += info->memory;  isend[4] += info->mallocs;
1021   if (flag == MAT_LOCAL) {
1022     info->nz_used      = isend[0];
1023     info->nz_allocated = isend[1];
1024     info->nz_unneeded  = isend[2];
1025     info->memory       = isend[3];
1026     info->mallocs      = isend[4];
1027   } else if (flag == MAT_GLOBAL_MAX) {
1028     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1029     info->nz_used      = irecv[0];
1030     info->nz_allocated = irecv[1];
1031     info->nz_unneeded  = irecv[2];
1032     info->memory       = irecv[3];
1033     info->mallocs      = irecv[4];
1034   } else if (flag == MAT_GLOBAL_SUM) {
1035     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1036     info->nz_used      = irecv[0];
1037     info->nz_allocated = irecv[1];
1038     info->nz_unneeded  = irecv[2];
1039     info->memory       = irecv[3];
1040     info->mallocs      = irecv[4];
1041   }
1042   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1043   info->fill_ratio_needed = 0;
1044   info->factor_mallocs    = 0;
1045   info->rows_global       = (double)mat->M;
1046   info->columns_global    = (double)mat->N;
1047   info->rows_local        = (double)mat->m;
1048   info->columns_local     = (double)mat->N;
1049 
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNC__
1054 #define __FUNC__ "MatSetOption_MPIAIJ"
1055 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1056 {
1057   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1058   int        ierr;
1059 
1060   PetscFunctionBegin;
1061   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1062       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1063       op == MAT_COLUMNS_UNSORTED ||
1064       op == MAT_COLUMNS_SORTED ||
1065       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1066       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1067         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1068         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1069   } else if (op == MAT_ROW_ORIENTED) {
1070     a->roworiented = 1;
1071     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1072     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1073   } else if (op == MAT_ROWS_SORTED ||
1074              op == MAT_ROWS_UNSORTED ||
1075              op == MAT_SYMMETRIC ||
1076              op == MAT_STRUCTURALLY_SYMMETRIC ||
1077              op == MAT_YES_NEW_DIAGONALS)
1078     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1079   else if (op == MAT_COLUMN_ORIENTED) {
1080     a->roworiented = 0;
1081     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1082     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1083   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1084     a->donotstash = 1;
1085   } else if (op == MAT_NO_NEW_DIAGONALS){
1086     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1087   } else {
1088     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1089   }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNC__
1094 #define __FUNC__ "MatGetSize_MPIAIJ"
1095 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1096 {
1097   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1098 
1099   PetscFunctionBegin;
1100   if (m) *m = mat->M;
1101   if (n) *n = mat->N;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNC__
1106 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1107 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1108 {
1109   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1110 
1111   PetscFunctionBegin;
1112   if (m) *m = mat->m;
1113   if (n) *n = mat->n;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNC__
1118 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1119 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1120 {
1121   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1122 
1123   PetscFunctionBegin;
1124   *m = mat->rstart; *n = mat->rend;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNC__
1129 #define __FUNC__ "MatGetRow_MPIAIJ"
1130 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1131 {
1132   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1133   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1134   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1135   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1136   int        *cmap, *idx_p;
1137 
1138   PetscFunctionBegin;
1139   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1140   mat->getrowactive = PETSC_TRUE;
1141 
1142   if (!mat->rowvalues && (idx || v)) {
1143     /*
1144         allocate enough space to hold information from the longest row.
1145     */
1146     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1147     int     max = 1,m = mat->m,tmp;
1148     for ( i=0; i<m; i++ ) {
1149       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1150       if (max < tmp) { max = tmp; }
1151     }
1152     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1153     mat->rowindices = (int *) (mat->rowvalues + max);
1154   }
1155 
1156   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1157   lrow = row - rstart;
1158 
1159   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1160   if (!v)   {pvA = 0; pvB = 0;}
1161   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1162   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1163   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1164   nztot = nzA + nzB;
1165 
1166   cmap  = mat->garray;
1167   if (v  || idx) {
1168     if (nztot) {
1169       /* Sort by increasing column numbers, assuming A and B already sorted */
1170       int imark = -1;
1171       if (v) {
1172         *v = v_p = mat->rowvalues;
1173         for ( i=0; i<nzB; i++ ) {
1174           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1175           else break;
1176         }
1177         imark = i;
1178         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1179         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1180       }
1181       if (idx) {
1182         *idx = idx_p = mat->rowindices;
1183         if (imark > -1) {
1184           for ( i=0; i<imark; i++ ) {
1185             idx_p[i] = cmap[cworkB[i]];
1186           }
1187         } else {
1188           for ( i=0; i<nzB; i++ ) {
1189             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1190             else break;
1191           }
1192           imark = i;
1193         }
1194         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1195         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1196       }
1197     } else {
1198       if (idx) *idx = 0;
1199       if (v)   *v   = 0;
1200     }
1201   }
1202   *nz = nztot;
1203   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1204   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNC__
1209 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1210 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1211 {
1212   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1213 
1214   PetscFunctionBegin;
1215   if (aij->getrowactive == PETSC_FALSE) {
1216     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1217   }
1218   aij->getrowactive = PETSC_FALSE;
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatNorm_MPIAIJ"
1224 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1225 {
1226   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1227   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1228   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1229   double     sum = 0.0;
1230   Scalar     *v;
1231 
1232   PetscFunctionBegin;
1233   if (aij->size == 1) {
1234     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1235   } else {
1236     if (type == NORM_FROBENIUS) {
1237       v = amat->a;
1238       for (i=0; i<amat->nz; i++ ) {
1239 #if defined(PETSC_USE_COMPLEX)
1240         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1241 #else
1242         sum += (*v)*(*v); v++;
1243 #endif
1244       }
1245       v = bmat->a;
1246       for (i=0; i<bmat->nz; i++ ) {
1247 #if defined(PETSC_USE_COMPLEX)
1248         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1249 #else
1250         sum += (*v)*(*v); v++;
1251 #endif
1252       }
1253       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1254       *norm = sqrt(*norm);
1255     } else if (type == NORM_1) { /* max column norm */
1256       double *tmp, *tmp2;
1257       int    *jj, *garray = aij->garray;
1258       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1259       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1260       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1261       *norm = 0.0;
1262       v = amat->a; jj = amat->j;
1263       for ( j=0; j<amat->nz; j++ ) {
1264         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1265       }
1266       v = bmat->a; jj = bmat->j;
1267       for ( j=0; j<bmat->nz; j++ ) {
1268         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1269       }
1270       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1271       for ( j=0; j<aij->N; j++ ) {
1272         if (tmp2[j] > *norm) *norm = tmp2[j];
1273       }
1274       ierr = PetscFree(tmp);CHKERRQ(ierr);
1275       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1276     } else if (type == NORM_INFINITY) { /* max row norm */
1277       double ntemp = 0.0;
1278       for ( j=0; j<amat->m; j++ ) {
1279         v = amat->a + amat->i[j] + shift;
1280         sum = 0.0;
1281         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1282           sum += PetscAbsScalar(*v); v++;
1283         }
1284         v = bmat->a + bmat->i[j] + shift;
1285         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1286           sum += PetscAbsScalar(*v); v++;
1287         }
1288         if (sum > ntemp) ntemp = sum;
1289       }
1290       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1291     } else {
1292       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1293     }
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNC__
1299 #define __FUNC__ "MatTranspose_MPIAIJ"
1300 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1301 {
1302   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1303   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1304   int        ierr,shift = Aloc->indexshift;
1305   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1306   Mat        B;
1307   Scalar     *array;
1308 
1309   PetscFunctionBegin;
1310   if (matout == PETSC_NULL && M != N) {
1311     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1312   }
1313 
1314   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1315 
1316   /* copy over the A part */
1317   Aloc = (Mat_SeqAIJ*) a->A->data;
1318   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1319   row = a->rstart;
1320   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1321   for ( i=0; i<m; i++ ) {
1322     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1323     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1324   }
1325   aj = Aloc->j;
1326   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1327 
1328   /* copy over the B part */
1329   Aloc = (Mat_SeqAIJ*) a->B->data;
1330   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1331   row = a->rstart;
1332   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1333   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1334   for ( i=0; i<m; i++ ) {
1335     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1336     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1337   }
1338   ierr = PetscFree(ct);CHKERRQ(ierr);
1339   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1341   if (matout != PETSC_NULL) {
1342     *matout = B;
1343   } else {
1344     PetscOps       *Abops;
1345     struct _MatOps *Aops;
1346 
1347     /* This isn't really an in-place transpose .... but free data structures from a */
1348     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1349     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1350     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1351 #if defined (PETSC_USE_CTABLE)
1352     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1353 #else
1354     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1355 #endif
1356     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1357     if (a->lvec) VecDestroy(a->lvec);
1358     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1359     ierr = PetscFree(a);CHKERRQ(ierr);
1360 
1361     /*
1362        This is horrible, horrible code. We need to keep the
1363       A pointers for the bops and ops but copy everything
1364       else from C.
1365     */
1366     Abops   = A->bops;
1367     Aops    = A->ops;
1368     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1369     A->bops = Abops;
1370     A->ops  = Aops;
1371     PetscHeaderDestroy(B);
1372   }
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNC__
1377 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1378 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1379 {
1380   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1381   Mat        a = aij->A, b = aij->B;
1382   int        ierr,s1,s2,s3;
1383 
1384   PetscFunctionBegin;
1385   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1386   if (rr) {
1387     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1388     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1389     /* Overlap communication with computation. */
1390     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1391   }
1392   if (ll) {
1393     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1394     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1395     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1396   }
1397   /* scale  the diagonal block */
1398   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1399 
1400   if (rr) {
1401     /* Do a scatter end and then right scale the off-diagonal block */
1402     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1403     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1404   }
1405 
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 
1410 #undef __FUNC__
1411 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1412 int MatPrintHelp_MPIAIJ(Mat A)
1413 {
1414   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1415   int        ierr;
1416 
1417   PetscFunctionBegin;
1418   if (!a->rank) {
1419     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNC__
1425 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1426 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1427 {
1428   PetscFunctionBegin;
1429   *bs = 1;
1430   PetscFunctionReturn(0);
1431 }
1432 #undef __FUNC__
1433 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1434 int MatSetUnfactored_MPIAIJ(Mat A)
1435 {
1436   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1437   int        ierr;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNC__
1445 #define __FUNC__ "MatEqual_MPIAIJ"
1446 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1447 {
1448   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1449   Mat        a, b, c, d;
1450   PetscTruth flg;
1451   int        ierr;
1452 
1453   PetscFunctionBegin;
1454   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1455   a = matA->A; b = matA->B;
1456   c = matB->A; d = matB->B;
1457 
1458   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1459   if (flg == PETSC_TRUE) {
1460     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1461   }
1462   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNC__
1467 #define __FUNC__ "MatCopy_MPIAIJ"
1468 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1469 {
1470   int        ierr;
1471   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1472   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1473 
1474   PetscFunctionBegin;
1475   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1476     /* because of the column compression in the off-processor part of the matrix a->B,
1477        the number of columns in a->B and b->B may be different, hence we cannot call
1478        the MatCopy() directly on the two parts. If need be, we can provide a more
1479        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1480        then copying the submatrices */
1481     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1482   } else {
1483     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1484     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1485   }
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1490 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1491 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1492 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1493 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1494 
1495 /* -------------------------------------------------------------------*/
1496 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1497        MatGetRow_MPIAIJ,
1498        MatRestoreRow_MPIAIJ,
1499        MatMult_MPIAIJ,
1500        MatMultAdd_MPIAIJ,
1501        MatMultTrans_MPIAIJ,
1502        MatMultTransAdd_MPIAIJ,
1503        0,
1504        0,
1505        0,
1506        0,
1507        0,
1508        0,
1509        MatRelax_MPIAIJ,
1510        MatTranspose_MPIAIJ,
1511        MatGetInfo_MPIAIJ,
1512        MatEqual_MPIAIJ,
1513        MatGetDiagonal_MPIAIJ,
1514        MatDiagonalScale_MPIAIJ,
1515        MatNorm_MPIAIJ,
1516        MatAssemblyBegin_MPIAIJ,
1517        MatAssemblyEnd_MPIAIJ,
1518        0,
1519        MatSetOption_MPIAIJ,
1520        MatZeroEntries_MPIAIJ,
1521        MatZeroRows_MPIAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526        MatGetSize_MPIAIJ,
1527        MatGetLocalSize_MPIAIJ,
1528        MatGetOwnershipRange_MPIAIJ,
1529        0,
1530        0,
1531        0,
1532        0,
1533        MatDuplicate_MPIAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538        0,
1539        MatGetSubMatrices_MPIAIJ,
1540        MatIncreaseOverlap_MPIAIJ,
1541        MatGetValues_MPIAIJ,
1542        MatCopy_MPIAIJ,
1543        MatPrintHelp_MPIAIJ,
1544        MatScale_MPIAIJ,
1545        0,
1546        0,
1547        0,
1548        MatGetBlockSize_MPIAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553        MatFDColoringCreate_MPIAIJ,
1554        0,
1555        MatSetUnfactored_MPIAIJ,
1556        0,
1557        0,
1558        MatGetSubMatrix_MPIAIJ,
1559        0,
1560        0,
1561        MatGetMaps_Petsc};
1562 
1563 /* ----------------------------------------------------------------------------------------*/
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNC__
1567 #define __FUNC__ "MatStoreValues_MPIAIJ"
1568 int MatStoreValues_MPIAIJ(Mat mat)
1569 {
1570   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1571   int        ierr;
1572 
1573   PetscFunctionBegin;
1574   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1575   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNC__
1582 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1583 int MatRetrieveValues_MPIAIJ(Mat mat)
1584 {
1585   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1586   int        ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1590   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 #include "pc.h"
1596 EXTERN_C_BEGIN
1597 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1598 EXTERN_C_END
1599 
1600 #undef __FUNC__
1601 #define __FUNC__ "MatCreateMPIAIJ"
1602 /*@C
1603    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1604    (the default parallel PETSc format).  For good matrix assembly performance
1605    the user should preallocate the matrix storage by setting the parameters
1606    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1607    performance can be increased by more than a factor of 50.
1608 
1609    Collective on MPI_Comm
1610 
1611    Input Parameters:
1612 +  comm - MPI communicator
1613 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1614            This value should be the same as the local size used in creating the
1615            y vector for the matrix-vector product y = Ax.
1616 .  n - This value should be the same as the local size used in creating the
1617        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1618        calculated if N is given) For square matrices n is almost always m.
1619 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1620 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1621 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1622            (same value is used for all local rows)
1623 .  d_nnz - array containing the number of nonzeros in the various rows of the
1624            DIAGONAL portion of the local submatrix (possibly different for each row)
1625            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1626            The size of this array is equal to the number of local rows, i.e 'm'.
1627            You must leave room for the diagonal entry even if it is zero.
1628 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1629            submatrix (same value is used for all local rows).
1630 -  o_nnz - array containing the number of nonzeros in the various rows of the
1631            OFF-DIAGONAL portion of the local submatrix (possibly different for
1632            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1633            structure. The size of this array is equal to the number
1634            of local rows, i.e 'm'.
1635 
1636    Output Parameter:
1637 .  A - the matrix
1638 
1639    Notes:
1640    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1641    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1642    storage requirements for this matrix.
1643 
1644    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1645    processor than it must be used on all processors that share the object for
1646    that argument.
1647 
1648    The AIJ format (also called the Yale sparse matrix format or
1649    compressed row storage), is fully compatible with standard Fortran 77
1650    storage.  That is, the stored row and column indices can begin at
1651    either one (as in Fortran) or zero.  See the users manual for details.
1652 
1653    The user MUST specify either the local or global matrix dimensions
1654    (possibly both).
1655 
1656    The parallel matrix is partitioned such that the first m0 rows belong to
1657    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1658    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1659 
1660    The DIAGONAL portion of the local submatrix of a processor can be defined
1661    as the submatrix which is obtained by extraction the part corresponding
1662    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1663    first row that belongs to the processor, and r2 is the last row belonging
1664    to the this processor. This is a square mxm matrix. The remaining portion
1665    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1666 
1667    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1668 
1669    By default, this format uses inodes (identical nodes) when possible.
1670    We search for consecutive rows with the same nonzero structure, thereby
1671    reusing matrix information to achieve increased efficiency.
1672 
1673    Options Database Keys:
1674 +  -mat_aij_no_inode  - Do not use inodes
1675 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1676 -  -mat_aij_oneindex - Internally use indexing starting at 1
1677         rather than 0.  Note that when calling MatSetValues(),
1678         the user still MUST index entries starting at 0!
1679 .   -mat_mpi - use the parallel matrix data structures even on one processor
1680                (defaults to using SeqBAIJ format on one processor)
1681 .   -mat_mpi - use the parallel matrix data structures even on one processor
1682                (defaults to using SeqAIJ format on one processor)
1683 
1684 
1685    Example usage:
1686 
1687    Consider the following 8x8 matrix with 34 non-zero values, that is
1688    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1689    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1690    as follows:
1691 
1692 .vb
1693             1  2  0  |  0  3  0  |  0  4
1694     Proc0   0  5  6  |  7  0  0  |  8  0
1695             9  0 10  | 11  0  0  | 12  0
1696     -------------------------------------
1697            13  0 14  | 15 16 17  |  0  0
1698     Proc1   0 18  0  | 19 20 21  |  0  0
1699             0  0  0  | 22 23  0  | 24  0
1700     -------------------------------------
1701     Proc2  25 26 27  |  0  0 28  | 29  0
1702            30  0  0  | 31 32 33  |  0 34
1703 .ve
1704 
1705    This can be represented as a collection of submatrices as:
1706 
1707 .vb
1708       A B C
1709       D E F
1710       G H I
1711 .ve
1712 
1713    Where the submatrices A,B,C are owned by proc0, D,E,F are
1714    owned by proc1, G,H,I are owned by proc2.
1715 
1716    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1717    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1718    The 'M','N' parameters are 8,8, and have the same values on all procs.
1719 
1720    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1721    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1722    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1723    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1724    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1725    matrix, ans [DF] as another SeqAIJ matrix.
1726 
1727    When d_nz, o_nz parameters are specified, d_nz storage elements are
1728    allocated for every row of the local diagonal submatrix, and o_nz
1729    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1730    One way to choose d_nz and o_nz is to use the max nonzerors per local
1731    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1732    In this case, the values of d_nz,o_nz are:
1733 .vb
1734      proc0 : dnz = 2, o_nz = 2
1735      proc1 : dnz = 3, o_nz = 2
1736      proc2 : dnz = 1, o_nz = 4
1737 .ve
1738    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1739    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1740    for proc3. i.e we are using 12+15+10=37 storage locations to store
1741    34 values.
1742 
1743    When d_nnz, o_nnz parameters are specified, the storage is specified
1744    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1745    In the above case the values for d_nnz,o_nnz are:
1746 .vb
1747      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1748      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1749      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1750 .ve
1751    Here the space allocated is sum of all the above values i.e 34, and
1752    hence pre-allocation is perfect.
1753 
1754    Level: intermediate
1755 
1756 .keywords: matrix, aij, compressed row, sparse, parallel
1757 
1758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1759 @*/
1760 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1761 {
1762   Mat          B;
1763   Mat_MPIAIJ   *b;
1764   int          ierr, i,size,flag1 = 0, flag2 = 0;
1765 
1766   PetscFunctionBegin;
1767 
1768   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1769   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1770   if (d_nnz) {
1771     for (i=0; i<m; i++) {
1772       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1773     }
1774   }
1775   if (o_nnz) {
1776     for (i=0; i<m; i++) {
1777       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1778     }
1779   }
1780 
1781   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1782   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1783   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1784   if (!flag1 && !flag2 && size == 1) {
1785     if (M == PETSC_DECIDE) M = m;
1786     if (N == PETSC_DECIDE) N = n;
1787     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1788     PetscFunctionReturn(0);
1789   }
1790 
1791   *A = 0;
1792   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1793   PLogObjectCreate(B);
1794   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1795   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1796   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1797   B->ops->destroy = MatDestroy_MPIAIJ;
1798   B->ops->view    = MatView_MPIAIJ;
1799   B->factor       = 0;
1800   B->assembled    = PETSC_FALSE;
1801   B->mapping      = 0;
1802 
1803   B->insertmode      = NOT_SET_VALUES;
1804   b->size            = size;
1805   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1806 
1807   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1808     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1809   }
1810 
1811   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1812   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1813   b->m = m; B->m = m;
1814   b->n = n; B->n = n;
1815   b->N = N; B->N = N;
1816   b->M = M; B->M = M;
1817 
1818   /* the information in the maps duplicates the information computed below, eventually
1819      we should remove the duplicate information that is not contained in the maps */
1820   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1821   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1822 
1823   /* build local table of row and column ownerships */
1824   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1825   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1826   b->cowners = b->rowners + b->size + 2;
1827   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1828   b->rowners[0] = 0;
1829   for ( i=2; i<=b->size; i++ ) {
1830     b->rowners[i] += b->rowners[i-1];
1831   }
1832   b->rstart = b->rowners[b->rank];
1833   b->rend   = b->rowners[b->rank+1];
1834   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1835   b->cowners[0] = 0;
1836   for ( i=2; i<=b->size; i++ ) {
1837     b->cowners[i] += b->cowners[i-1];
1838   }
1839   b->cstart = b->cowners[b->rank];
1840   b->cend   = b->cowners[b->rank+1];
1841 
1842   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1843   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1844   PLogObjectParent(B,b->A);
1845   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1846   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1847   PLogObjectParent(B,b->B);
1848 
1849   /* build cache for off array entries formed */
1850   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1851   b->donotstash  = 0;
1852   b->colmap      = 0;
1853   b->garray      = 0;
1854   b->roworiented = 1;
1855 
1856   /* stuff used for matrix vector multiply */
1857   b->lvec      = 0;
1858   b->Mvctx     = 0;
1859 
1860   /* stuff for MatGetRow() */
1861   b->rowindices   = 0;
1862   b->rowvalues    = 0;
1863   b->getrowactive = PETSC_FALSE;
1864 
1865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1866                                      "MatStoreValues_MPIAIJ",
1867                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1869                                      "MatRetrieveValues_MPIAIJ",
1870                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1872                                      "MatGetDiagonalBlock_MPIAIJ",
1873                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1874   *A = B;
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNC__
1879 #define __FUNC__ "MatDuplicate_MPIAIJ"
1880 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1881 {
1882   Mat        mat;
1883   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1884   int        ierr, len=0, flg;
1885 
1886   PetscFunctionBegin;
1887   *newmat       = 0;
1888   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1889   PLogObjectCreate(mat);
1890   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1891   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1892   mat->ops->destroy = MatDestroy_MPIAIJ;
1893   mat->ops->view    = MatView_MPIAIJ;
1894   mat->factor       = matin->factor;
1895   mat->assembled    = PETSC_TRUE;
1896   mat->insertmode   = NOT_SET_VALUES;
1897 
1898   a->m = mat->m   = oldmat->m;
1899   a->n = mat->n   = oldmat->n;
1900   a->M = mat->M   = oldmat->M;
1901   a->N = mat->N   = oldmat->N;
1902 
1903   a->rstart       = oldmat->rstart;
1904   a->rend         = oldmat->rend;
1905   a->cstart       = oldmat->cstart;
1906   a->cend         = oldmat->cend;
1907   a->size         = oldmat->size;
1908   a->rank         = oldmat->rank;
1909   a->donotstash   = oldmat->donotstash;
1910   a->roworiented  = oldmat->roworiented;
1911   a->rowindices   = 0;
1912   a->rowvalues    = 0;
1913   a->getrowactive = PETSC_FALSE;
1914 
1915   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1916   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1917   a->cowners = a->rowners + a->size + 2;
1918   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1919   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1920   if (oldmat->colmap) {
1921 #if defined (PETSC_USE_CTABLE)
1922     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1923 #else
1924     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1925     PLogObjectMemory(mat,(a->N)*sizeof(int));
1926     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1927 #endif
1928   } else a->colmap = 0;
1929   if (oldmat->garray) {
1930     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1931     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1932     PLogObjectMemory(mat,len*sizeof(int));
1933     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1934   } else a->garray = 0;
1935 
1936   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1937   PLogObjectParent(mat,a->lvec);
1938   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1939   PLogObjectParent(mat,a->Mvctx);
1940   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1941   PLogObjectParent(mat,a->A);
1942   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1943   PLogObjectParent(mat,a->B);
1944   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1945   if (flg) {
1946     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1947   }
1948   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1949   *newmat = mat;
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #include "sys.h"
1954 
1955 #undef __FUNC__
1956 #define __FUNC__ "MatLoad_MPIAIJ"
1957 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1958 {
1959   Mat          A;
1960   Scalar       *vals,*svals;
1961   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1962   MPI_Status   status;
1963   int          i, nz, ierr, j,rstart, rend, fd;
1964   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1965   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1966   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1967 
1968   PetscFunctionBegin;
1969   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1970   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1971   if (!rank) {
1972     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1973     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1974     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1975     if (header[3] < 0) {
1976       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1977     }
1978   }
1979 
1980   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1981   M = header[1]; N = header[2];
1982   /* determine ownership of all rows */
1983   m = M/size + ((M % size) > rank);
1984   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1985   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1986   rowners[0] = 0;
1987   for ( i=2; i<=size; i++ ) {
1988     rowners[i] += rowners[i-1];
1989   }
1990   rstart = rowners[rank];
1991   rend   = rowners[rank+1];
1992 
1993   /* distribute row lengths to all processors */
1994   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1995   offlens = ourlens + (rend-rstart);
1996   if (!rank) {
1997     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1998     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1999     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2000     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2001     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2002     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2003   } else {
2004     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2005   }
2006 
2007   if (!rank) {
2008     /* calculate the number of nonzeros on each processor */
2009     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2010     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2011     for ( i=0; i<size; i++ ) {
2012       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2013         procsnz[i] += rowlengths[j];
2014       }
2015     }
2016     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2017 
2018     /* determine max buffer needed and allocate it */
2019     maxnz = 0;
2020     for ( i=0; i<size; i++ ) {
2021       maxnz = PetscMax(maxnz,procsnz[i]);
2022     }
2023     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2024 
2025     /* read in my part of the matrix column indices  */
2026     nz     = procsnz[0];
2027     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2028     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2029 
2030     /* read in every one elses and ship off */
2031     for ( i=1; i<size; i++ ) {
2032       nz   = procsnz[i];
2033       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2034       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2035     }
2036     ierr = PetscFree(cols);CHKERRQ(ierr);
2037   } else {
2038     /* determine buffer space needed for message */
2039     nz = 0;
2040     for ( i=0; i<m; i++ ) {
2041       nz += ourlens[i];
2042     }
2043     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2044 
2045     /* receive message of column indices*/
2046     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2047     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2048     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2049   }
2050 
2051   /* determine column ownership if matrix is not square */
2052   if (N != M) {
2053     n      = N/size + ((N % size) > rank);
2054     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2055     cstart = cend - n;
2056   } else {
2057     cstart = rstart;
2058     cend   = rend;
2059     n      = cend - cstart;
2060   }
2061 
2062   /* loop over local rows, determining number of off diagonal entries */
2063   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2064   jj = 0;
2065   for ( i=0; i<m; i++ ) {
2066     for ( j=0; j<ourlens[i]; j++ ) {
2067       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2068       jj++;
2069     }
2070   }
2071 
2072   /* create our matrix */
2073   for ( i=0; i<m; i++ ) {
2074     ourlens[i] -= offlens[i];
2075   }
2076   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2077   A = *newmat;
2078   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2079   for ( i=0; i<m; i++ ) {
2080     ourlens[i] += offlens[i];
2081   }
2082 
2083   if (!rank) {
2084     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2085 
2086     /* read in my part of the matrix numerical values  */
2087     nz   = procsnz[0];
2088     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2089 
2090     /* insert into matrix */
2091     jj      = rstart;
2092     smycols = mycols;
2093     svals   = vals;
2094     for ( i=0; i<m; i++ ) {
2095       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2096       smycols += ourlens[i];
2097       svals   += ourlens[i];
2098       jj++;
2099     }
2100 
2101     /* read in other processors and ship out */
2102     for ( i=1; i<size; i++ ) {
2103       nz   = procsnz[i];
2104       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2105       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2106     }
2107     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2108   } else {
2109     /* receive numeric values */
2110     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2111 
2112     /* receive message of values*/
2113     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2114     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2115     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2116 
2117     /* insert into matrix */
2118     jj      = rstart;
2119     smycols = mycols;
2120     svals   = vals;
2121     for ( i=0; i<m; i++ ) {
2122       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2123       smycols += ourlens[i];
2124       svals   += ourlens[i];
2125       jj++;
2126     }
2127   }
2128   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2129   ierr = PetscFree(vals);CHKERRQ(ierr);
2130   ierr = PetscFree(mycols);CHKERRQ(ierr);
2131   ierr = PetscFree(rowners);CHKERRQ(ierr);
2132 
2133   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #undef __FUNC__
2139 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2140 /*
2141     Not great since it makes two copies of the submatrix, first an SeqAIJ
2142   in local and then by concatenating the local matrices the end result.
2143   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2144 */
2145 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2146 {
2147   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2148   Mat        *local,M, Mreuse;
2149   Scalar     *vwork,*aa;
2150   MPI_Comm   comm = mat->comm;
2151   Mat_SeqAIJ *aij;
2152   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2153 
2154   PetscFunctionBegin;
2155   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2157 
2158   if (call ==  MAT_REUSE_MATRIX) {
2159     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2160     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2161     local = &Mreuse;
2162     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2163   } else {
2164     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2165     Mreuse = *local;
2166     ierr = PetscFree(local);CHKERRQ(ierr);
2167   }
2168 
2169   /*
2170       m - number of local rows
2171       n - number of columns (same on all processors)
2172       rstart - first row in new global matrix generated
2173   */
2174   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2175   if (call == MAT_INITIAL_MATRIX) {
2176     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2177     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2178     ii  = aij->i;
2179     jj  = aij->j;
2180 
2181     /*
2182         Determine the number of non-zeros in the diagonal and off-diagonal
2183         portions of the matrix in order to do correct preallocation
2184     */
2185 
2186     /* first get start and end of "diagonal" columns */
2187     if (csize == PETSC_DECIDE) {
2188       nlocal = n/size + ((n % size) > rank);
2189     } else {
2190       nlocal = csize;
2191     }
2192     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2193     rstart = rend - nlocal;
2194     if (rank == size - 1 && rend != n) {
2195       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2196     }
2197 
2198     /* next, compute all the lengths */
2199     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2200     olens = dlens + m;
2201     for ( i=0; i<m; i++ ) {
2202       jend = ii[i+1] - ii[i];
2203       olen = 0;
2204       dlen = 0;
2205       for ( j=0; j<jend; j++ ) {
2206         if ( *jj < rstart || *jj >= rend) olen++;
2207         else dlen++;
2208         jj++;
2209       }
2210       olens[i] = olen;
2211       dlens[i] = dlen;
2212     }
2213     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2214     ierr = PetscFree(dlens);CHKERRQ(ierr);
2215   } else {
2216     int ml,nl;
2217 
2218     M = *newmat;
2219     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2220     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2221     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2222     /*
2223          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2224        rather than the slower MatSetValues().
2225     */
2226     M->was_assembled = PETSC_TRUE;
2227     M->assembled     = PETSC_FALSE;
2228   }
2229   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2230   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2231   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2232   ii  = aij->i;
2233   jj  = aij->j;
2234   aa  = aij->a;
2235   for (i=0; i<m; i++) {
2236     row   = rstart + i;
2237     nz    = ii[i+1] - ii[i];
2238     cwork = jj;     jj += nz;
2239     vwork = aa;     aa += nz;
2240     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2241   }
2242 
2243   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2244   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2245   *newmat = M;
2246 
2247   /* save submatrix used in processor for next request */
2248   if (call ==  MAT_INITIAL_MATRIX) {
2249     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2250     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2251   }
2252 
2253   PetscFunctionReturn(0);
2254 }
2255