xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0f5bd95cca42d693ada6d048329ab39533680180)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.303 1999/10/04 18:51:08 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /*
17   Local utility routine that creates a mapping from the global column
18 number to the local number in the off-diagonal part of the local
19 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
20 a slightly higher hash table cost; without it it is not scalable (each processor
21 has an order N integer array but is fast to acess.
22 */
23 #undef __FUNC__
24 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
25 int CreateColmap_MPIAIJ_Private(Mat mat)
26 {
27   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
28   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
29   int        n = B->n,i,ierr;
30 
31   PetscFunctionBegin;
32 #if defined (PETSC_USE_CTABLE)
33   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
34   for ( i=0; i<n; i++ ){
35     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
36   }
37 #else
38   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
39   PLogObjectMemory(mat,aij->N*sizeof(int));
40   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
41   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
42 #endif
43   PetscFunctionReturn(0);
44 }
45 
46 #define CHUNKSIZE   15
47 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
48 { \
49  \
50     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
51     rmax = aimax[row]; nrow = ailen[row];  \
52     col1 = col - shift; \
53      \
54     low = 0; high = nrow; \
55     while (high-low > 5) { \
56       t = (low+high)/2; \
57       if (rp[t] > col) high = t; \
58       else             low  = t; \
59     } \
60       for ( _i=0; _i<nrow; _i++ ) { \
61         if (rp[_i] > col1) break; \
62         if (rp[_i] == col1) { \
63           if (addv == ADD_VALUES) ap[_i] += value;   \
64           else                  ap[_i] = value; \
65           goto a_noinsert; \
66         } \
67       }  \
68       if (nonew == 1) goto a_noinsert; \
69       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
70       if (nrow >= rmax) { \
71         /* there is no extra room in row, therefore enlarge */ \
72         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
73         Scalar *new_a; \
74  \
75         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
76  \
77         /* malloc new storage space */ \
78         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
79         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
80         new_j   = (int *) (new_a + new_nz); \
81         new_i   = new_j + new_nz; \
82  \
83         /* copy over old data into new slots */ \
84         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
85         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
86         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
87         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
88         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
89                                                            len*sizeof(int));CHKERRQ(ierr); \
90         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
91         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
92                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
93         /* free up old matrix storage */ \
94  \
95         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
96         if (!a->singlemalloc) { \
97            ierr = PetscFree(a->i);CHKERRQ(ierr); \
98            ierr = PetscFree(a->j);CHKERRQ(ierr); \
99         } \
100         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
101         a->singlemalloc = 1; \
102  \
103         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
104         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
105         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
106         a->maxnz += CHUNKSIZE; \
107         a->reallocs++; \
108       } \
109       N = nrow++ - 1; a->nz++; \
110       /* shift up all the later entries in this row */ \
111       for ( ii=N; ii>=_i; ii-- ) { \
112         rp[ii+1] = rp[ii]; \
113         ap[ii+1] = ap[ii]; \
114       } \
115       rp[_i] = col1;  \
116       ap[_i] = value;  \
117       a_noinsert: ; \
118       ailen[row] = nrow; \
119 }
120 
121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
122 { \
123  \
124     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
125     rmax = bimax[row]; nrow = bilen[row];  \
126     col1 = col - shift; \
127      \
128     low = 0; high = nrow; \
129     while (high-low > 5) { \
130       t = (low+high)/2; \
131       if (rp[t] > col) high = t; \
132       else             low  = t; \
133     } \
134        for ( _i=0; _i<nrow; _i++ ) { \
135         if (rp[_i] > col1) break; \
136         if (rp[_i] == col1) { \
137           if (addv == ADD_VALUES) ap[_i] += value;   \
138           else                  ap[_i] = value; \
139           goto b_noinsert; \
140         } \
141       }  \
142       if (nonew == 1) goto b_noinsert; \
143       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
144       if (nrow >= rmax) { \
145         /* there is no extra room in row, therefore enlarge */ \
146         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
147         Scalar *new_a; \
148  \
149         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
150  \
151         /* malloc new storage space */ \
152         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
153         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
154         new_j   = (int *) (new_a + new_nz); \
155         new_i   = new_j + new_nz; \
156  \
157         /* copy over old data into new slots */ \
158         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
159         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
160         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
161         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
162         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
163                                                            len*sizeof(int));CHKERRQ(ierr); \
164         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
165         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
166                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
167         /* free up old matrix storage */ \
168  \
169         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
170         if (!b->singlemalloc) { \
171           ierr = PetscFree(b->i);CHKERRQ(ierr); \
172           ierr = PetscFree(b->j);CHKERRQ(ierr); \
173         } \
174         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
175         b->singlemalloc = 1; \
176  \
177         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
178         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
179         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
180         b->maxnz += CHUNKSIZE; \
181         b->reallocs++; \
182       } \
183       N = nrow++ - 1; b->nz++; \
184       /* shift up all the later entries in this row */ \
185       for ( ii=N; ii>=_i; ii-- ) { \
186         rp[ii+1] = rp[ii]; \
187         ap[ii+1] = ap[ii]; \
188       } \
189       rp[_i] = col1;  \
190       ap[_i] = value;  \
191       b_noinsert: ; \
192       bilen[row] = nrow; \
193 }
194 
195 #undef __FUNC__
196 #define __FUNC__ "MatSetValues_MPIAIJ"
197 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
198 {
199   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
200   Scalar     value;
201   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
202   int        cstart = aij->cstart, cend = aij->cend,row,col;
203   int        roworiented = aij->roworiented;
204 
205   /* Some Variables required in the macro */
206   Mat        A = aij->A;
207   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
208   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
209   Scalar     *aa = a->a;
210 
211   Mat        B = aij->B;
212   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
213   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
214   Scalar     *ba = b->a;
215 
216   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
217   int        nonew = a->nonew,shift = a->indexshift;
218   Scalar     *ap;
219 
220   PetscFunctionBegin;
221   for ( i=0; i<m; i++ ) {
222     if (im[i] < 0) continue;
223 #if defined(PETSC_USE_BOPT_g)
224     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
225 #endif
226     if (im[i] >= rstart && im[i] < rend) {
227       row = im[i] - rstart;
228       for ( j=0; j<n; j++ ) {
229         if (in[j] >= cstart && in[j] < cend){
230           col = in[j] - cstart;
231           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
232           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         }
235         else if (in[j] < 0) continue;
236 #if defined(PETSC_USE_BOPT_g)
237         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
238 #endif
239         else {
240           if (mat->was_assembled) {
241             if (!aij->colmap) {
242               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
243             }
244 #if defined (PETSC_USE_CTABLE)
245             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
246 	    col--;
247 #else
248             col = aij->colmap[in[j]] - 1;
249 #endif
250             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
251               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
252               col =  in[j];
253               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
254               B = aij->B;
255               b = (Mat_SeqAIJ *) B->data;
256               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
257               ba = b->a;
258             }
259           } else col = in[j];
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNC__
279 #define __FUNC__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
283   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
284   int        cstart = aij->cstart, cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for ( i=0; i<m; i++ ) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
289     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for ( j=0; j<n; j++ ) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
294         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNC__
322 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNC__
349 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   Scalar      *val;
356   InsertMode  addv = mat->insertmode;
357 
358   PetscFunctionBegin;
359   if (!aij->donotstash) {
360     while (1) {
361       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
362       if (!flg) break;
363 
364       for ( i=0; i<n; ) {
365         /* Now identify the consecutive vals belonging to the same row */
366         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
367         if (j < n) ncols = j-i;
368         else       ncols = n-i;
369         /* Now assemble all these values with a single function call */
370         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
371         i = j;
372       }
373     }
374     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
375   }
376 
377   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
378   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
379 
380   /* determine if any processor has disassembled, if so we must
381      also disassemble ourselfs, in order that we may reassemble. */
382   /*
383      if nonzero structure of submatrix B cannot change then we know that
384      no processor disassembled thus we can skip this stuff
385   */
386   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
387     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
388     if (mat->was_assembled && !other_disassembled) {
389       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNC__
407 #define __FUNC__ "MatZeroEntries_MPIAIJ"
408 int MatZeroEntries_MPIAIJ(Mat A)
409 {
410   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
411   int        ierr;
412 
413   PetscFunctionBegin;
414   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
415   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ "MatZeroRows_MPIAIJ"
421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
424   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
425   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
426   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433 
434   PetscFunctionBegin;
435   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   procs  = nprocs + size;
442   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
443   for ( i=0; i<N; i++ ) {
444     idx = rows[i];
445     found = 0;
446     for ( j=0; j<size; j++ ) {
447       if (idx >= owners[j] && idx < owners[j+1]) {
448         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
449       }
450     }
451     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
452   }
453   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
454 
455   /* inform other processors of number of messages and max length*/
456   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
457   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
458   nrecvs = work[rank];
459   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
460   nmax = work[rank];
461   ierr = PetscFree(work);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
465   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
466   for ( i=0; i<nrecvs; i++ ) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
475   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
476   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
477   starts[0] = 0;
478   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
479   for ( i=0; i<N; i++ ) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ISRestoreIndices(is,&rows);
483 
484   starts[0] = 0;
485   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
486   count = 0;
487   for ( i=0; i<size; i++ ) {
488     if (procs[i]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]  = n;
506     slen += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
513   count = 0;
514   for ( i=0; i<nrecvs; i++ ) {
515     values = rvalues + i*nmax;
516     for ( j=0; j<lens[i]; j++ ) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for ( i = 0; i < slen; i++ ) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
562     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNC__
572 #define __FUNC__ "MatMult_MPIAIJ"
573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
576   int        ierr,nt;
577 
578   PetscFunctionBegin;
579   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
580   if (nt != a->n) {
581     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
582   }
583   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
585   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
586   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatMultAdd_MPIAIJ"
592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
595   int        ierr;
596 
597   PetscFunctionBegin;
598   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
600   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNC__
606 #define __FUNC__ "MatMultTrans_MPIAIJ"
607 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
608 {
609   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
610   int        ierr;
611 
612   PetscFunctionBegin;
613   /* do nondiagonal part */
614   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
615   /* send it on its way */
616   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   /* do local part */
618   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
619   /* receive remote parts: note this assumes the values are not actually */
620   /* inserted in yy until the next line, which is true for my implementation*/
621   /* but is not perhaps always true. */
622   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNC__
627 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
628 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
629 {
630   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
631   int        ierr;
632 
633   PetscFunctionBegin;
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   /* send it on its way */
637   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638   /* do local part */
639   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
640   /* receive remote parts: note this assumes the values are not actually */
641   /* inserted in yy until the next line, which is true for my implementation*/
642   /* but is not perhaps always true. */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNC__
652 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   int        ierr;
656   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
657 
658   PetscFunctionBegin;
659   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNC__
668 #define __FUNC__ "MatScale_MPIAIJ"
669 int MatScale_MPIAIJ(Scalar *aa,Mat A)
670 {
671   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
672   int        ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
676   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNC__
681 #define __FUNC__ "MatDestroy_MPIAIJ"
682 int MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688 
689   if (mat->mapping) {
690     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
691   }
692   if (mat->bmapping) {
693     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
694   }
695   if (mat->rmap) {
696     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
697   }
698   if (mat->cmap) {
699     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
700   }
701 #if defined(PETSC_USE_LOG)
702   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
703 #endif
704   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
705   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
706   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
708 #if defined (PETSC_USE_CTABLE)
709   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
710 #else
711   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
712 #endif
713   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
714   if (aij->lvec)   VecDestroy(aij->lvec);
715   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
716   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
717   ierr = PetscFree(aij);CHKERRQ(ierr);
718   PLogObjectDestroy(mat);
719   PetscHeaderDestroy(mat);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNC__
724 #define __FUNC__ "MatView_MPIAIJ_Binary"
725 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
726 {
727   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
728   int         ierr;
729 
730   PetscFunctionBegin;
731   if (aij->size == 1) {
732     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
733   }
734   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNC__
739 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
740 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
741 {
742   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
743   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
744   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
745   int         size = aij->size;
746   int         isdraw,isascii;
747   FILE        *fd;
748 
749   PetscFunctionBegin;
750   isdraw  = PetscTypeCompare(viewer,DRAW_VIEWER);
751   isascii = PetscTypeCompare(viewer,ASCII_VIEWER);
752   if (isascii) {
753     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
754     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
755       MatInfo info;
756       int     flg;
757       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
758       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
759       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
760       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
761       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
762       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
763          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
764       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
765          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
766       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
767       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
768       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
769       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
770       fflush(fd);
771       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
772       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
773       PetscFunctionReturn(0);
774     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
775       PetscFunctionReturn(0);
776     }
777   } else if (isdraw) {
778     Draw       draw;
779     PetscTruth isnull;
780     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
781     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
782   }
783 
784   if (size == 1) {
785     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
786   } else {
787     /* assemble the entire matrix onto first processor. */
788     Mat         A;
789     Mat_SeqAIJ *Aloc;
790     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
791     Scalar      *a;
792 
793     if (!rank) {
794       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
795     } else {
796       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
797     }
798     PLogObjectParent(mat,A);
799 
800     /* copy over the A part */
801     Aloc = (Mat_SeqAIJ*) aij->A->data;
802     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
803     row = aij->rstart;
804     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
805     for ( i=0; i<m; i++ ) {
806       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
807       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
808     }
809     aj = Aloc->j;
810     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
811 
812     /* copy over the B part */
813     Aloc = (Mat_SeqAIJ*) aij->B->data;
814     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
815     row = aij->rstart;
816     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
817     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
818     for ( i=0; i<m; i++ ) {
819       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
820       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
821     }
822     ierr = PetscFree(ct);CHKERRQ(ierr);
823     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
825     /*
826        Everyone has to call to draw the matrix since the graphics waits are
827        synchronized across all processors that share the Draw object
828     */
829     if (!rank || isdraw) {
830       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
831     }
832     ierr = MatDestroy(A);CHKERRQ(ierr);
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNC__
838 #define __FUNC__ "MatView_MPIAIJ"
839 int MatView_MPIAIJ(Mat mat,Viewer viewer)
840 {
841   int         ierr;
842   int         isascii,isdraw,issocket,isbinary;
843 
844   PetscFunctionBegin;
845   isascii  = PetscTypeCompare(viewer,ASCII_VIEWER);
846   isdraw   = PetscTypeCompare(viewer,DRAW_VIEWER);
847   isbinary = PetscTypeCompare(viewer,BINARY_VIEWER);
848   issocket = PetscTypeCompare(viewer,SOCKET_VIEWER);
849   if (isascii || isdraw || isbinary || issocket) {
850     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
851   } else {
852     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 /*
858     This has to provide several versions.
859 
860      2) a) use only local smoothing updating outer values only once.
861         b) local smoothing updating outer values each inner iteration
862      3) color updating out values betwen colors.
863 */
864 #undef __FUNC__
865 #define __FUNC__ "MatRelax_MPIAIJ"
866 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
867                            double fshift,int its,Vec xx)
868 {
869   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
870   Mat        AA = mat->A, BB = mat->B;
871   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
872   Scalar     *b,*x,*xs,*ls,d,*v,sum;
873   int        ierr,*idx, *diag;
874   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
875 
876   PetscFunctionBegin;
877   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
878   diag = A->diag;
879   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
880     if (flag & SOR_ZERO_INITIAL_GUESS) {
881       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
882       PetscFunctionReturn(0);
883     }
884     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
885     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
886     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
887     if (xx != bb) {
888       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
889     } else {
890       b = x;
891     }
892     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
893     xs = x + shift; /* shift by one for index start of 1 */
894     ls = ls + shift;
895     while (its--) {
896       /* go down through the rows */
897       for ( i=0; i<m; i++ ) {
898         n    = A->i[i+1] - A->i[i];
899 	PLogFlops(4*n+3);
900         idx  = A->j + A->i[i] + shift;
901         v    = A->a + A->i[i] + shift;
902         sum  = b[i];
903         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904         d    = fshift + A->a[diag[i]+shift];
905         n    = B->i[i+1] - B->i[i];
906         idx  = B->j + B->i[i] + shift;
907         v    = B->a + B->i[i] + shift;
908         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
910       }
911       /* come up through the rows */
912       for ( i=m-1; i>-1; i-- ) {
913         n    = A->i[i+1] - A->i[i];
914 	PLogFlops(4*n+3)
915         idx  = A->j + A->i[i] + shift;
916         v    = A->a + A->i[i] + shift;
917         sum  = b[i];
918         SPARSEDENSEMDOT(sum,xs,v,idx,n);
919         d    = fshift + A->a[diag[i]+shift];
920         n    = B->i[i+1] - B->i[i];
921         idx  = B->j + B->i[i] + shift;
922         v    = B->a + B->i[i] + shift;
923         SPARSEDENSEMDOT(sum,ls,v,idx,n);
924         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
925       }
926     }
927     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
928     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
929     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
930   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
931     if (flag & SOR_ZERO_INITIAL_GUESS) {
932       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
933       PetscFunctionReturn(0);
934     }
935     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
936     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
937     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
938     if (xx != bb) {
939       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
940     } else {
941       b = x;
942     }
943     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
944     xs = x + shift; /* shift by one for index start of 1 */
945     ls = ls + shift;
946     while (its--) {
947       for ( i=0; i<m; i++ ) {
948         n    = A->i[i+1] - A->i[i];
949 	PLogFlops(4*n+3);
950         idx  = A->j + A->i[i] + shift;
951         v    = A->a + A->i[i] + shift;
952         sum  = b[i];
953         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954         d    = fshift + A->a[diag[i]+shift];
955         n    = B->i[i+1] - B->i[i];
956         idx  = B->j + B->i[i] + shift;
957         v    = B->a + B->i[i] + shift;
958         SPARSEDENSEMDOT(sum,ls,v,idx,n);
959         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960       }
961     }
962     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
964     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
965   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
966     if (flag & SOR_ZERO_INITIAL_GUESS) {
967       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     }
970     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
971     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
972     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
973     if (xx != bb) {
974       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
975     } else {
976       b = x;
977     }
978     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
979     xs = x + shift; /* shift by one for index start of 1 */
980     ls = ls + shift;
981     while (its--) {
982       for ( i=m-1; i>-1; i-- ) {
983         n    = A->i[i+1] - A->i[i];
984 	PLogFlops(4*n+3);
985         idx  = A->j + A->i[i] + shift;
986         v    = A->a + A->i[i] + shift;
987         sum  = b[i];
988         SPARSEDENSEMDOT(sum,xs,v,idx,n);
989         d    = fshift + A->a[diag[i]+shift];
990         n    = B->i[i+1] - B->i[i];
991         idx  = B->j + B->i[i] + shift;
992         v    = B->a + B->i[i] + shift;
993         SPARSEDENSEMDOT(sum,ls,v,idx,n);
994         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
995       }
996     }
997     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
998     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
999     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1000   } else {
1001     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNC__
1007 #define __FUNC__ "MatGetInfo_MPIAIJ"
1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1009 {
1010   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1011   Mat        A = mat->A, B = mat->B;
1012   int        ierr;
1013   double     isend[5], irecv[5];
1014 
1015   PetscFunctionBegin;
1016   info->block_size     = 1.0;
1017   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1018   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1019   isend[3] = info->memory;  isend[4] = info->mallocs;
1020   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1021   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1022   isend[3] += info->memory;  isend[4] += info->mallocs;
1023   if (flag == MAT_LOCAL) {
1024     info->nz_used      = isend[0];
1025     info->nz_allocated = isend[1];
1026     info->nz_unneeded  = isend[2];
1027     info->memory       = isend[3];
1028     info->mallocs      = isend[4];
1029   } else if (flag == MAT_GLOBAL_MAX) {
1030     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1031     info->nz_used      = irecv[0];
1032     info->nz_allocated = irecv[1];
1033     info->nz_unneeded  = irecv[2];
1034     info->memory       = irecv[3];
1035     info->mallocs      = irecv[4];
1036   } else if (flag == MAT_GLOBAL_SUM) {
1037     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1038     info->nz_used      = irecv[0];
1039     info->nz_allocated = irecv[1];
1040     info->nz_unneeded  = irecv[2];
1041     info->memory       = irecv[3];
1042     info->mallocs      = irecv[4];
1043   }
1044   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1045   info->fill_ratio_needed = 0;
1046   info->factor_mallocs    = 0;
1047   info->rows_global       = (double)mat->M;
1048   info->columns_global    = (double)mat->N;
1049   info->rows_local        = (double)mat->m;
1050   info->columns_local     = (double)mat->N;
1051 
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNC__
1056 #define __FUNC__ "MatSetOption_MPIAIJ"
1057 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1058 {
1059   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1060   int        ierr;
1061 
1062   PetscFunctionBegin;
1063   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1064       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1065       op == MAT_COLUMNS_UNSORTED ||
1066       op == MAT_COLUMNS_SORTED ||
1067       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1068       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1069         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1070         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1071   } else if (op == MAT_ROW_ORIENTED) {
1072     a->roworiented = 1;
1073     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1074     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1075   } else if (op == MAT_ROWS_SORTED ||
1076              op == MAT_ROWS_UNSORTED ||
1077              op == MAT_SYMMETRIC ||
1078              op == MAT_STRUCTURALLY_SYMMETRIC ||
1079              op == MAT_YES_NEW_DIAGONALS)
1080     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1081   else if (op == MAT_COLUMN_ORIENTED) {
1082     a->roworiented = 0;
1083     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1084     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1085   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1086     a->donotstash = 1;
1087   } else if (op == MAT_NO_NEW_DIAGONALS){
1088     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1089   } else {
1090     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNC__
1096 #define __FUNC__ "MatGetSize_MPIAIJ"
1097 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1098 {
1099   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1100 
1101   PetscFunctionBegin;
1102   if (m) *m = mat->M;
1103   if (n) *n = mat->N;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNC__
1108 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1109 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1110 {
1111   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1112 
1113   PetscFunctionBegin;
1114   if (m) *m = mat->m;
1115   if (n) *n = mat->n;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1121 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1122 {
1123   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1124 
1125   PetscFunctionBegin;
1126   *m = mat->rstart; *n = mat->rend;
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNC__
1131 #define __FUNC__ "MatGetRow_MPIAIJ"
1132 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1133 {
1134   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1135   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1136   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1137   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1138   int        *cmap, *idx_p;
1139 
1140   PetscFunctionBegin;
1141   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1142   mat->getrowactive = PETSC_TRUE;
1143 
1144   if (!mat->rowvalues && (idx || v)) {
1145     /*
1146         allocate enough space to hold information from the longest row.
1147     */
1148     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1149     int     max = 1,m = mat->m,tmp;
1150     for ( i=0; i<m; i++ ) {
1151       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1152       if (max < tmp) { max = tmp; }
1153     }
1154     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1155     mat->rowindices = (int *) (mat->rowvalues + max);
1156   }
1157 
1158   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1159   lrow = row - rstart;
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for ( i=0; i<nzB; i++ ) {
1176           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1181         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for ( i=0; i<imark; i++ ) {
1187             idx_p[i] = cmap[cworkB[i]];
1188           }
1189         } else {
1190           for ( i=0; i<nzB; i++ ) {
1191             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1192             else break;
1193           }
1194           imark = i;
1195         }
1196         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1197         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1198       }
1199     } else {
1200       if (idx) *idx = 0;
1201       if (v)   *v   = 0;
1202     }
1203   }
1204   *nz = nztot;
1205   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1206   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1212 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1213 {
1214   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1215 
1216   PetscFunctionBegin;
1217   if (aij->getrowactive == PETSC_FALSE) {
1218     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1219   }
1220   aij->getrowactive = PETSC_FALSE;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNC__
1225 #define __FUNC__ "MatNorm_MPIAIJ"
1226 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1227 {
1228   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1229   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1230   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1231   double     sum = 0.0;
1232   Scalar     *v;
1233 
1234   PetscFunctionBegin;
1235   if (aij->size == 1) {
1236     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1237   } else {
1238     if (type == NORM_FROBENIUS) {
1239       v = amat->a;
1240       for (i=0; i<amat->nz; i++ ) {
1241 #if defined(PETSC_USE_COMPLEX)
1242         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1243 #else
1244         sum += (*v)*(*v); v++;
1245 #endif
1246       }
1247       v = bmat->a;
1248       for (i=0; i<bmat->nz; i++ ) {
1249 #if defined(PETSC_USE_COMPLEX)
1250         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1251 #else
1252         sum += (*v)*(*v); v++;
1253 #endif
1254       }
1255       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1256       *norm = sqrt(*norm);
1257     } else if (type == NORM_1) { /* max column norm */
1258       double *tmp, *tmp2;
1259       int    *jj, *garray = aij->garray;
1260       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1261       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1262       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1263       *norm = 0.0;
1264       v = amat->a; jj = amat->j;
1265       for ( j=0; j<amat->nz; j++ ) {
1266         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1267       }
1268       v = bmat->a; jj = bmat->j;
1269       for ( j=0; j<bmat->nz; j++ ) {
1270         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1271       }
1272       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1273       for ( j=0; j<aij->N; j++ ) {
1274         if (tmp2[j] > *norm) *norm = tmp2[j];
1275       }
1276       ierr = PetscFree(tmp);CHKERRQ(ierr);
1277       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1278     } else if (type == NORM_INFINITY) { /* max row norm */
1279       double ntemp = 0.0;
1280       for ( j=0; j<amat->m; j++ ) {
1281         v = amat->a + amat->i[j] + shift;
1282         sum = 0.0;
1283         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1284           sum += PetscAbsScalar(*v); v++;
1285         }
1286         v = bmat->a + bmat->i[j] + shift;
1287         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1288           sum += PetscAbsScalar(*v); v++;
1289         }
1290         if (sum > ntemp) ntemp = sum;
1291       }
1292       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1293     } else {
1294       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1295     }
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatTranspose_MPIAIJ"
1302 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1303 {
1304   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1305   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1306   int        ierr,shift = Aloc->indexshift;
1307   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1308   Mat        B;
1309   Scalar     *array;
1310 
1311   PetscFunctionBegin;
1312   if (matout == PETSC_NULL && M != N) {
1313     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1314   }
1315 
1316   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1317 
1318   /* copy over the A part */
1319   Aloc = (Mat_SeqAIJ*) a->A->data;
1320   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1321   row = a->rstart;
1322   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1323   for ( i=0; i<m; i++ ) {
1324     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1325     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1326   }
1327   aj = Aloc->j;
1328   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1329 
1330   /* copy over the B part */
1331   Aloc = (Mat_SeqAIJ*) a->B->data;
1332   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1333   row = a->rstart;
1334   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1335   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1336   for ( i=0; i<m; i++ ) {
1337     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1338     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1339   }
1340   ierr = PetscFree(ct);CHKERRQ(ierr);
1341   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1342   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1343   if (matout != PETSC_NULL) {
1344     *matout = B;
1345   } else {
1346     PetscOps       *Abops;
1347     struct _MatOps *Aops;
1348 
1349     /* This isn't really an in-place transpose .... but free data structures from a */
1350     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1351     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1352     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1353 #if defined (PETSC_USE_CTABLE)
1354     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1355 #else
1356     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1357 #endif
1358     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1359     if (a->lvec) VecDestroy(a->lvec);
1360     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1361     ierr = PetscFree(a);CHKERRQ(ierr);
1362 
1363     /*
1364        This is horrible, horrible code. We need to keep the
1365       A pointers for the bops and ops but copy everything
1366       else from C.
1367     */
1368     Abops   = A->bops;
1369     Aops    = A->ops;
1370     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1371     A->bops = Abops;
1372     A->ops  = Aops;
1373     PetscHeaderDestroy(B);
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNC__
1379 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1380 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1381 {
1382   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1383   Mat        a = aij->A, b = aij->B;
1384   int        ierr,s1,s2,s3;
1385 
1386   PetscFunctionBegin;
1387   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1388   if (rr) {
1389     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1390     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1391     /* Overlap communication with computation. */
1392     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1393   }
1394   if (ll) {
1395     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1396     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1397     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1398   }
1399   /* scale  the diagonal block */
1400   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1401 
1402   if (rr) {
1403     /* Do a scatter end and then right scale the off-diagonal block */
1404     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1405     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1406   }
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 
1412 #undef __FUNC__
1413 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1414 int MatPrintHelp_MPIAIJ(Mat A)
1415 {
1416   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1417   int        ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNC__
1427 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1428 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1429 {
1430   PetscFunctionBegin;
1431   *bs = 1;
1432   PetscFunctionReturn(0);
1433 }
1434 #undef __FUNC__
1435 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1436 int MatSetUnfactored_MPIAIJ(Mat A)
1437 {
1438   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1439   int        ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNC__
1447 #define __FUNC__ "MatEqual_MPIAIJ"
1448 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1449 {
1450   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1451   Mat        a, b, c, d;
1452   PetscTruth flg;
1453   int        ierr;
1454 
1455   PetscFunctionBegin;
1456   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNC__
1469 #define __FUNC__ "MatCopy_MPIAIJ"
1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471 {
1472   int        ierr;
1473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475 
1476   PetscFunctionBegin;
1477   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1478     /* because of the column compression in the off-processor part of the matrix a->B,
1479        the number of columns in a->B and b->B may be different, hence we cannot call
1480        the MatCopy() directly on the two parts. If need be, we can provide a more
1481        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1482        then copying the submatrices */
1483     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1484   } else {
1485     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1486     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1492 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1493 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1494 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1495 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1496 
1497 /* -------------------------------------------------------------------*/
1498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1499        MatGetRow_MPIAIJ,
1500        MatRestoreRow_MPIAIJ,
1501        MatMult_MPIAIJ,
1502        MatMultAdd_MPIAIJ,
1503        MatMultTrans_MPIAIJ,
1504        MatMultTransAdd_MPIAIJ,
1505        0,
1506        0,
1507        0,
1508        0,
1509        0,
1510        0,
1511        MatRelax_MPIAIJ,
1512        MatTranspose_MPIAIJ,
1513        MatGetInfo_MPIAIJ,
1514        MatEqual_MPIAIJ,
1515        MatGetDiagonal_MPIAIJ,
1516        MatDiagonalScale_MPIAIJ,
1517        MatNorm_MPIAIJ,
1518        MatAssemblyBegin_MPIAIJ,
1519        MatAssemblyEnd_MPIAIJ,
1520        0,
1521        MatSetOption_MPIAIJ,
1522        MatZeroEntries_MPIAIJ,
1523        MatZeroRows_MPIAIJ,
1524        0,
1525        0,
1526        0,
1527        0,
1528        MatGetSize_MPIAIJ,
1529        MatGetLocalSize_MPIAIJ,
1530        MatGetOwnershipRange_MPIAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535        MatDuplicate_MPIAIJ,
1536        0,
1537        0,
1538        0,
1539        0,
1540        0,
1541        MatGetSubMatrices_MPIAIJ,
1542        MatIncreaseOverlap_MPIAIJ,
1543        MatGetValues_MPIAIJ,
1544        MatCopy_MPIAIJ,
1545        MatPrintHelp_MPIAIJ,
1546        MatScale_MPIAIJ,
1547        0,
1548        0,
1549        0,
1550        MatGetBlockSize_MPIAIJ,
1551        0,
1552        0,
1553        0,
1554        0,
1555        MatFDColoringCreate_MPIAIJ,
1556        0,
1557        MatSetUnfactored_MPIAIJ,
1558        0,
1559        0,
1560        MatGetSubMatrix_MPIAIJ,
1561        0,
1562        0,
1563        MatGetMaps_Petsc};
1564 
1565 /* ----------------------------------------------------------------------------------------*/
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNC__
1569 #define __FUNC__ "MatStoreValues_MPIAIJ"
1570 int MatStoreValues_MPIAIJ(Mat mat)
1571 {
1572   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1573   int        ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1577   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 EXTERN_C_END
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNC__
1584 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1585 int MatRetrieveValues_MPIAIJ(Mat mat)
1586 {
1587   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1588   int        ierr;
1589 
1590   PetscFunctionBegin;
1591   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1592   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 EXTERN_C_END
1596 
1597 #include "pc.h"
1598 EXTERN_C_BEGIN
1599 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1600 EXTERN_C_END
1601 
1602 #undef __FUNC__
1603 #define __FUNC__ "MatCreateMPIAIJ"
1604 /*@C
1605    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1606    (the default parallel PETSc format).  For good matrix assembly performance
1607    the user should preallocate the matrix storage by setting the parameters
1608    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1609    performance can be increased by more than a factor of 50.
1610 
1611    Collective on MPI_Comm
1612 
1613    Input Parameters:
1614 +  comm - MPI communicator
1615 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1616            This value should be the same as the local size used in creating the
1617            y vector for the matrix-vector product y = Ax.
1618 .  n - This value should be the same as the local size used in creating the
1619        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1620        calculated if N is given) For square matrices n is almost always m.
1621 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1622 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1623 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1624            (same value is used for all local rows)
1625 .  d_nnz - array containing the number of nonzeros in the various rows of the
1626            DIAGONAL portion of the local submatrix (possibly different for each row)
1627            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1628            The size of this array is equal to the number of local rows, i.e 'm'.
1629            You must leave room for the diagonal entry even if it is zero.
1630 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1631            submatrix (same value is used for all local rows).
1632 -  o_nnz - array containing the number of nonzeros in the various rows of the
1633            OFF-DIAGONAL portion of the local submatrix (possibly different for
1634            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1635            structure. The size of this array is equal to the number
1636            of local rows, i.e 'm'.
1637 
1638    Output Parameter:
1639 .  A - the matrix
1640 
1641    Notes:
1642    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1643    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1644    storage requirements for this matrix.
1645 
1646    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1647    processor than it must be used on all processors that share the object for
1648    that argument.
1649 
1650    The AIJ format (also called the Yale sparse matrix format or
1651    compressed row storage), is fully compatible with standard Fortran 77
1652    storage.  That is, the stored row and column indices can begin at
1653    either one (as in Fortran) or zero.  See the users manual for details.
1654 
1655    The user MUST specify either the local or global matrix dimensions
1656    (possibly both).
1657 
1658    The parallel matrix is partitioned such that the first m0 rows belong to
1659    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1660    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1661 
1662    The DIAGONAL portion of the local submatrix of a processor can be defined
1663    as the submatrix which is obtained by extraction the part corresponding
1664    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1665    first row that belongs to the processor, and r2 is the last row belonging
1666    to the this processor. This is a square mxm matrix. The remaining portion
1667    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1668 
1669    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1670 
1671    By default, this format uses inodes (identical nodes) when possible.
1672    We search for consecutive rows with the same nonzero structure, thereby
1673    reusing matrix information to achieve increased efficiency.
1674 
1675    Options Database Keys:
1676 +  -mat_aij_no_inode  - Do not use inodes
1677 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1678 -  -mat_aij_oneindex - Internally use indexing starting at 1
1679         rather than 0.  Note that when calling MatSetValues(),
1680         the user still MUST index entries starting at 0!
1681 .   -mat_mpi - use the parallel matrix data structures even on one processor
1682                (defaults to using SeqBAIJ format on one processor)
1683 .   -mat_mpi - use the parallel matrix data structures even on one processor
1684                (defaults to using SeqAIJ format on one processor)
1685 
1686 
1687    Example usage:
1688 
1689    Consider the following 8x8 matrix with 34 non-zero values, that is
1690    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1691    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1692    as follows:
1693 
1694 .vb
1695             1  2  0  |  0  3  0  |  0  4
1696     Proc0   0  5  6  |  7  0  0  |  8  0
1697             9  0 10  | 11  0  0  | 12  0
1698     -------------------------------------
1699            13  0 14  | 15 16 17  |  0  0
1700     Proc1   0 18  0  | 19 20 21  |  0  0
1701             0  0  0  | 22 23  0  | 24  0
1702     -------------------------------------
1703     Proc2  25 26 27  |  0  0 28  | 29  0
1704            30  0  0  | 31 32 33  |  0 34
1705 .ve
1706 
1707    This can be represented as a collection of submatrices as:
1708 
1709 .vb
1710       A B C
1711       D E F
1712       G H I
1713 .ve
1714 
1715    Where the submatrices A,B,C are owned by proc0, D,E,F are
1716    owned by proc1, G,H,I are owned by proc2.
1717 
1718    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1719    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1720    The 'M','N' parameters are 8,8, and have the same values on all procs.
1721 
1722    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1723    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1724    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1725    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1726    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1727    matrix, ans [DF] as another SeqAIJ matrix.
1728 
1729    When d_nz, o_nz parameters are specified, d_nz storage elements are
1730    allocated for every row of the local diagonal submatrix, and o_nz
1731    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1732    One way to choose d_nz and o_nz is to use the max nonzerors per local
1733    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1734    In this case, the values of d_nz,o_nz are:
1735 .vb
1736      proc0 : dnz = 2, o_nz = 2
1737      proc1 : dnz = 3, o_nz = 2
1738      proc2 : dnz = 1, o_nz = 4
1739 .ve
1740    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1741    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1742    for proc3. i.e we are using 12+15+10=37 storage locations to store
1743    34 values.
1744 
1745    When d_nnz, o_nnz parameters are specified, the storage is specified
1746    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1747    In the above case the values for d_nnz,o_nnz are:
1748 .vb
1749      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1750      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1751      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1752 .ve
1753    Here the space allocated is sum of all the above values i.e 34, and
1754    hence pre-allocation is perfect.
1755 
1756    Level: intermediate
1757 
1758 .keywords: matrix, aij, compressed row, sparse, parallel
1759 
1760 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1761 @*/
1762 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1763 {
1764   Mat          B;
1765   Mat_MPIAIJ   *b;
1766   int          ierr, i,size,flag1 = 0, flag2 = 0;
1767 
1768   PetscFunctionBegin;
1769 
1770   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1771   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1772   if (d_nnz) {
1773     for (i=0; i<m; i++) {
1774       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1775     }
1776   }
1777   if (o_nnz) {
1778     for (i=0; i<m; i++) {
1779       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1780     }
1781   }
1782 
1783   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1784   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1785   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1786   if (!flag1 && !flag2 && size == 1) {
1787     if (M == PETSC_DECIDE) M = m;
1788     if (N == PETSC_DECIDE) N = n;
1789     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1790     PetscFunctionReturn(0);
1791   }
1792 
1793   *A = 0;
1794   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1795   PLogObjectCreate(B);
1796   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1797   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1798   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1799   B->ops->destroy = MatDestroy_MPIAIJ;
1800   B->ops->view    = MatView_MPIAIJ;
1801   B->factor       = 0;
1802   B->assembled    = PETSC_FALSE;
1803   B->mapping      = 0;
1804 
1805   B->insertmode      = NOT_SET_VALUES;
1806   b->size            = size;
1807   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1808 
1809   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1810     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1811   }
1812 
1813   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1814   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1815   b->m = m; B->m = m;
1816   b->n = n; B->n = n;
1817   b->N = N; B->N = N;
1818   b->M = M; B->M = M;
1819 
1820   /* the information in the maps duplicates the information computed below, eventually
1821      we should remove the duplicate information that is not contained in the maps */
1822   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1823   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1824 
1825   /* build local table of row and column ownerships */
1826   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1827   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1828   b->cowners = b->rowners + b->size + 2;
1829   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1830   b->rowners[0] = 0;
1831   for ( i=2; i<=b->size; i++ ) {
1832     b->rowners[i] += b->rowners[i-1];
1833   }
1834   b->rstart = b->rowners[b->rank];
1835   b->rend   = b->rowners[b->rank+1];
1836   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1837   b->cowners[0] = 0;
1838   for ( i=2; i<=b->size; i++ ) {
1839     b->cowners[i] += b->cowners[i-1];
1840   }
1841   b->cstart = b->cowners[b->rank];
1842   b->cend   = b->cowners[b->rank+1];
1843 
1844   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1845   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1846   PLogObjectParent(B,b->A);
1847   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1848   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1849   PLogObjectParent(B,b->B);
1850 
1851   /* build cache for off array entries formed */
1852   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1853   b->donotstash  = 0;
1854   b->colmap      = 0;
1855   b->garray      = 0;
1856   b->roworiented = 1;
1857 
1858   /* stuff used for matrix vector multiply */
1859   b->lvec      = 0;
1860   b->Mvctx     = 0;
1861 
1862   /* stuff for MatGetRow() */
1863   b->rowindices   = 0;
1864   b->rowvalues    = 0;
1865   b->getrowactive = PETSC_FALSE;
1866 
1867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1868                                      "MatStoreValues_MPIAIJ",
1869                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1871                                      "MatRetrieveValues_MPIAIJ",
1872                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1874                                      "MatGetDiagonalBlock_MPIAIJ",
1875                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1876   *A = B;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNC__
1881 #define __FUNC__ "MatDuplicate_MPIAIJ"
1882 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1883 {
1884   Mat        mat;
1885   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1886   int        ierr, len=0, flg;
1887 
1888   PetscFunctionBegin;
1889   *newmat       = 0;
1890   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1891   PLogObjectCreate(mat);
1892   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1893   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1894   mat->ops->destroy = MatDestroy_MPIAIJ;
1895   mat->ops->view    = MatView_MPIAIJ;
1896   mat->factor       = matin->factor;
1897   mat->assembled    = PETSC_TRUE;
1898   mat->insertmode   = NOT_SET_VALUES;
1899 
1900   a->m = mat->m   = oldmat->m;
1901   a->n = mat->n   = oldmat->n;
1902   a->M = mat->M   = oldmat->M;
1903   a->N = mat->N   = oldmat->N;
1904 
1905   a->rstart       = oldmat->rstart;
1906   a->rend         = oldmat->rend;
1907   a->cstart       = oldmat->cstart;
1908   a->cend         = oldmat->cend;
1909   a->size         = oldmat->size;
1910   a->rank         = oldmat->rank;
1911   a->donotstash   = oldmat->donotstash;
1912   a->roworiented  = oldmat->roworiented;
1913   a->rowindices   = 0;
1914   a->rowvalues    = 0;
1915   a->getrowactive = PETSC_FALSE;
1916 
1917   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1918   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1919   a->cowners = a->rowners + a->size + 2;
1920   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1921   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1922   if (oldmat->colmap) {
1923 #if defined (PETSC_USE_CTABLE)
1924     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1925 #else
1926     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1927     PLogObjectMemory(mat,(a->N)*sizeof(int));
1928     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1929 #endif
1930   } else a->colmap = 0;
1931   if (oldmat->garray) {
1932     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1933     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1934     PLogObjectMemory(mat,len*sizeof(int));
1935     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1936   } else a->garray = 0;
1937 
1938   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1939   PLogObjectParent(mat,a->lvec);
1940   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1941   PLogObjectParent(mat,a->Mvctx);
1942   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1943   PLogObjectParent(mat,a->A);
1944   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1945   PLogObjectParent(mat,a->B);
1946   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1947   if (flg) {
1948     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1949   }
1950   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1951   *newmat = mat;
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #include "sys.h"
1956 
1957 #undef __FUNC__
1958 #define __FUNC__ "MatLoad_MPIAIJ"
1959 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1960 {
1961   Mat          A;
1962   Scalar       *vals,*svals;
1963   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1964   MPI_Status   status;
1965   int          i, nz, ierr, j,rstart, rend, fd;
1966   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1967   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1968   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1969 
1970   PetscFunctionBegin;
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1973   if (!rank) {
1974     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1977     if (header[3] < 0) {
1978       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1979     }
1980   }
1981 
1982   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1983   M = header[1]; N = header[2];
1984   /* determine ownership of all rows */
1985   m = M/size + ((M % size) > rank);
1986   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1987   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1988   rowners[0] = 0;
1989   for ( i=2; i<=size; i++ ) {
1990     rowners[i] += rowners[i-1];
1991   }
1992   rstart = rowners[rank];
1993   rend   = rowners[rank+1];
1994 
1995   /* distribute row lengths to all processors */
1996   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1997   offlens = ourlens + (rend-rstart);
1998   if (!rank) {
1999     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
2000     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2001     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2002     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2003     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2004     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2005   } else {
2006     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2007   }
2008 
2009   if (!rank) {
2010     /* calculate the number of nonzeros on each processor */
2011     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2012     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2013     for ( i=0; i<size; i++ ) {
2014       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2015         procsnz[i] += rowlengths[j];
2016       }
2017     }
2018     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2019 
2020     /* determine max buffer needed and allocate it */
2021     maxnz = 0;
2022     for ( i=0; i<size; i++ ) {
2023       maxnz = PetscMax(maxnz,procsnz[i]);
2024     }
2025     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2026 
2027     /* read in my part of the matrix column indices  */
2028     nz     = procsnz[0];
2029     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2030     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2031 
2032     /* read in every one elses and ship off */
2033     for ( i=1; i<size; i++ ) {
2034       nz   = procsnz[i];
2035       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2036       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2037     }
2038     ierr = PetscFree(cols);CHKERRQ(ierr);
2039   } else {
2040     /* determine buffer space needed for message */
2041     nz = 0;
2042     for ( i=0; i<m; i++ ) {
2043       nz += ourlens[i];
2044     }
2045     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2046 
2047     /* receive message of column indices*/
2048     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2049     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2050     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2051   }
2052 
2053   /* determine column ownership if matrix is not square */
2054   if (N != M) {
2055     n      = N/size + ((N % size) > rank);
2056     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2057     cstart = cend - n;
2058   } else {
2059     cstart = rstart;
2060     cend   = rend;
2061     n      = cend - cstart;
2062   }
2063 
2064   /* loop over local rows, determining number of off diagonal entries */
2065   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2066   jj = 0;
2067   for ( i=0; i<m; i++ ) {
2068     for ( j=0; j<ourlens[i]; j++ ) {
2069       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2070       jj++;
2071     }
2072   }
2073 
2074   /* create our matrix */
2075   for ( i=0; i<m; i++ ) {
2076     ourlens[i] -= offlens[i];
2077   }
2078   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2079   A = *newmat;
2080   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2081   for ( i=0; i<m; i++ ) {
2082     ourlens[i] += offlens[i];
2083   }
2084 
2085   if (!rank) {
2086     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2087 
2088     /* read in my part of the matrix numerical values  */
2089     nz   = procsnz[0];
2090     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2091 
2092     /* insert into matrix */
2093     jj      = rstart;
2094     smycols = mycols;
2095     svals   = vals;
2096     for ( i=0; i<m; i++ ) {
2097       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2098       smycols += ourlens[i];
2099       svals   += ourlens[i];
2100       jj++;
2101     }
2102 
2103     /* read in other processors and ship out */
2104     for ( i=1; i<size; i++ ) {
2105       nz   = procsnz[i];
2106       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2107       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2108     }
2109     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2110   } else {
2111     /* receive numeric values */
2112     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2113 
2114     /* receive message of values*/
2115     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2116     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2117     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2118 
2119     /* insert into matrix */
2120     jj      = rstart;
2121     smycols = mycols;
2122     svals   = vals;
2123     for ( i=0; i<m; i++ ) {
2124       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2125       smycols += ourlens[i];
2126       svals   += ourlens[i];
2127       jj++;
2128     }
2129   }
2130   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2131   ierr = PetscFree(vals);CHKERRQ(ierr);
2132   ierr = PetscFree(mycols);CHKERRQ(ierr);
2133   ierr = PetscFree(rowners);CHKERRQ(ierr);
2134 
2135   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNC__
2141 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2142 /*
2143     Not great since it makes two copies of the submatrix, first an SeqAIJ
2144   in local and then by concatenating the local matrices the end result.
2145   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2146 */
2147 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2148 {
2149   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2150   Mat        *local,M, Mreuse;
2151   Scalar     *vwork,*aa;
2152   MPI_Comm   comm = mat->comm;
2153   Mat_SeqAIJ *aij;
2154   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2155 
2156   PetscFunctionBegin;
2157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2159 
2160   if (call ==  MAT_REUSE_MATRIX) {
2161     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2162     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2163     local = &Mreuse;
2164     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2165   } else {
2166     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2167     Mreuse = *local;
2168     ierr = PetscFree(local);CHKERRQ(ierr);
2169   }
2170 
2171   /*
2172       m - number of local rows
2173       n - number of columns (same on all processors)
2174       rstart - first row in new global matrix generated
2175   */
2176   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2177   if (call == MAT_INITIAL_MATRIX) {
2178     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2179     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2180     ii  = aij->i;
2181     jj  = aij->j;
2182 
2183     /*
2184         Determine the number of non-zeros in the diagonal and off-diagonal
2185         portions of the matrix in order to do correct preallocation
2186     */
2187 
2188     /* first get start and end of "diagonal" columns */
2189     if (csize == PETSC_DECIDE) {
2190       nlocal = n/size + ((n % size) > rank);
2191     } else {
2192       nlocal = csize;
2193     }
2194     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2195     rstart = rend - nlocal;
2196     if (rank == size - 1 && rend != n) {
2197       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2198     }
2199 
2200     /* next, compute all the lengths */
2201     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2202     olens = dlens + m;
2203     for ( i=0; i<m; i++ ) {
2204       jend = ii[i+1] - ii[i];
2205       olen = 0;
2206       dlen = 0;
2207       for ( j=0; j<jend; j++ ) {
2208         if ( *jj < rstart || *jj >= rend) olen++;
2209         else dlen++;
2210         jj++;
2211       }
2212       olens[i] = olen;
2213       dlens[i] = dlen;
2214     }
2215     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2216     ierr = PetscFree(dlens);CHKERRQ(ierr);
2217   } else {
2218     int ml,nl;
2219 
2220     M = *newmat;
2221     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2222     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2223     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2224     /*
2225          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2226        rather than the slower MatSetValues().
2227     */
2228     M->was_assembled = PETSC_TRUE;
2229     M->assembled     = PETSC_FALSE;
2230   }
2231   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2232   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2233   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2234   ii  = aij->i;
2235   jj  = aij->j;
2236   aa  = aij->a;
2237   for (i=0; i<m; i++) {
2238     row   = rstart + i;
2239     nz    = ii[i+1] - ii[i];
2240     cwork = jj;     jj += nz;
2241     vwork = aa;     aa += nz;
2242     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2243   }
2244 
2245   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247   *newmat = M;
2248 
2249   /* save submatrix used in processor for next request */
2250   if (call ==  MAT_INITIAL_MATRIX) {
2251     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2252     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2253   }
2254 
2255   PetscFunctionReturn(0);
2256 }
2257