xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a84b093a86b993cdc13fc7a2e464c26044d2eea1)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.295 1999/06/08 22:55:49 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         PetscFree(a->a);  \
94         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
95         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
96         a->singlemalloc = 1; \
97  \
98         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
99         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
100         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
101         a->maxnz += CHUNKSIZE; \
102         a->reallocs++; \
103       } \
104       N = nrow++ - 1; a->nz++; \
105       /* shift up all the later entries in this row */ \
106       for ( ii=N; ii>=_i; ii-- ) { \
107         rp[ii+1] = rp[ii]; \
108         ap[ii+1] = ap[ii]; \
109       } \
110       rp[_i] = col1;  \
111       ap[_i] = value;  \
112       a_noinsert: ; \
113       ailen[row] = nrow; \
114 }
115 
116 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
117 { \
118  \
119     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
120     rmax = bimax[row]; nrow = bilen[row];  \
121     col1 = col - shift; \
122      \
123     low = 0; high = nrow; \
124     while (high-low > 5) { \
125       t = (low+high)/2; \
126       if (rp[t] > col) high = t; \
127       else             low  = t; \
128     } \
129        for ( _i=0; _i<nrow; _i++ ) { \
130         if (rp[_i] > col1) break; \
131         if (rp[_i] == col1) { \
132           if (addv == ADD_VALUES) ap[_i] += value;   \
133           else                  ap[_i] = value; \
134           goto b_noinsert; \
135         } \
136       }  \
137       if (nonew == 1) goto b_noinsert; \
138       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
156         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
157         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
158                                                            len*sizeof(int));CHKERRQ(ierr); \
159         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
160         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
161                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
162         /* free up old matrix storage */ \
163  \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
170         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
172         b->maxnz += CHUNKSIZE; \
173         b->reallocs++; \
174       } \
175       N = nrow++ - 1; b->nz++; \
176       /* shift up all the later entries in this row */ \
177       for ( ii=N; ii>=_i; ii-- ) { \
178         rp[ii+1] = rp[ii]; \
179         ap[ii+1] = ap[ii]; \
180       } \
181       rp[_i] = col1;  \
182       ap[_i] = value;  \
183       b_noinsert: ; \
184       bilen[row] = nrow; \
185 }
186 
187 #undef __FUNC__
188 #define __FUNC__ "MatSetValues_MPIAIJ"
189 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
190 {
191   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
192   Scalar     value;
193   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
194   int        cstart = aij->cstart, cend = aij->cend,row,col;
195   int        roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat        A = aij->A;
199   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
200   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
201   Scalar     *aa = a->a;
202 
203   Mat        B = aij->B;
204   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
205   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
206   Scalar     *ba = b->a;
207 
208   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
209   int        nonew = a->nonew,shift = a->indexshift;
210   Scalar     *ap;
211 
212   PetscFunctionBegin;
213   for ( i=0; i<m; i++ ) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_BOPT_g)
216     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for ( j=0; j<n; j++ ) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
225           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
226         }
227         else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_BOPT_g)
229         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
230 #endif
231         else {
232           if (mat->was_assembled) {
233             if (!aij->colmap) {
234               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235             }
236 #if defined (PETSC_USE_CTABLE)
237             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
238 	    col--;
239 #else
240             col = aij->colmap[in[j]] - 1;
241 #endif
242             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
243               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
244               col =  in[j];
245               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
246               B = aij->B;
247               b = (Mat_SeqAIJ *) B->data;
248               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
249               ba = b->a;
250             }
251           } else col = in[j];
252           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
253           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
254           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
255         }
256       }
257     } else {
258       if (!aij->donotstash) {
259         if (roworiented) {
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
263         }
264       }
265     }
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNC__
271 #define __FUNC__ "MatGetValues_MPIAIJ"
272 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
275   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
276   int        cstart = aij->cstart, cend = aij->cend,row,col;
277 
278   PetscFunctionBegin;
279   for ( i=0; i<m; i++ ) {
280     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
281     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
282     if (idxm[i] >= rstart && idxm[i] < rend) {
283       row = idxm[i] - rstart;
284       for ( j=0; j<n; j++ ) {
285         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
286         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
287         if (idxn[j] >= cstart && idxn[j] < cend){
288           col = idxn[j] - cstart;
289           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
290         } else {
291           if (!aij->colmap) {
292             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
293           }
294 #if defined (PETSC_USE_CTABLE)
295           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
296           col --;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
318   int         ierr,nstash,reallocs;
319   InsertMode  addv;
320 
321   PetscFunctionBegin;
322   if (aij->donotstash) {
323     PetscFunctionReturn(0);
324   }
325 
326   /* make sure all processors are either in INSERTMODE or ADDMODE */
327   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
328   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
330   }
331   mat->insertmode = addv; /* in case this processor had no cache */
332 
333   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
334   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
335   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336   PetscFunctionReturn(0);
337 }
338 
339 
340 #undef __FUNC__
341 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343 {
344   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
345   int         i,j,rstart,ncols,n,ierr,flg;
346   int         *row,*col,other_disassembled;
347   Scalar      *val;
348   InsertMode  addv = mat->insertmode;
349 
350   PetscFunctionBegin;
351   if (!aij->donotstash) {
352     while (1) {
353       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
354       if (!flg) break;
355 
356       for ( i=0; i<n; ) {
357         /* Now identify the consecutive vals belonging to the same row */
358         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
359         if (j < n) ncols = j-i;
360         else       ncols = n-i;
361         /* Now assemble all these values with a single function call */
362         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
363         i = j;
364       }
365     }
366     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
367   }
368 
369   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
370   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
371 
372   /* determine if any processor has disassembled, if so we must
373      also disassemble ourselfs, in order that we may reassemble. */
374   /*
375      if nonzero structure of submatrix B cannot change then we know that
376      no processor disassembled thus we can skip this stuff
377   */
378   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
379     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
380     if (mat->was_assembled && !other_disassembled) {
381       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
382     }
383   }
384 
385   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
386     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
387   }
388   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
389   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
390 
391   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
392   PetscFunctionReturn(0);
393 }
394 
395 #undef __FUNC__
396 #define __FUNC__ "MatZeroEntries_MPIAIJ"
397 int MatZeroEntries_MPIAIJ(Mat A)
398 {
399   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
400   int        ierr;
401 
402   PetscFunctionBegin;
403   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
404   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatZeroRows_MPIAIJ"
410 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
411 {
412   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
413   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
414   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
415   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
416   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
417   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
418   MPI_Comm       comm = A->comm;
419   MPI_Request    *send_waits,*recv_waits;
420   MPI_Status     recv_status,*send_status;
421   IS             istmp;
422 
423   PetscFunctionBegin;
424   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
425   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
426 
427   /*  first count number of contributors to each processor */
428   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
429   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
430   procs  = nprocs + size;
431   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
432   for ( i=0; i<N; i++ ) {
433     idx = rows[i];
434     found = 0;
435     for ( j=0; j<size; j++ ) {
436       if (idx >= owners[j] && idx < owners[j+1]) {
437         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
438       }
439     }
440     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
441   }
442   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
443 
444   /* inform other processors of number of messages and max length*/
445   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
446   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
447   nrecvs = work[rank];
448   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
449   nmax = work[rank];
450   PetscFree(work);
451 
452   /* post receives:   */
453   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
454   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
455   for ( i=0; i<nrecvs; i++ ) {
456     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
457   }
458 
459   /* do sends:
460       1) starts[i] gives the starting index in svalues for stuff going to
461          the ith processor
462   */
463   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
464   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
465   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
466   starts[0] = 0;
467   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
468   for ( i=0; i<N; i++ ) {
469     svalues[starts[owner[i]]++] = rows[i];
470   }
471   ISRestoreIndices(is,&rows);
472 
473   starts[0] = 0;
474   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
475   count = 0;
476   for ( i=0; i<size; i++ ) {
477     if (procs[i]) {
478       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
479     }
480   }
481   PetscFree(starts);
482 
483   base = owners[rank];
484 
485   /*  wait on receives */
486   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
487   source = lens + nrecvs;
488   count  = nrecvs; slen = 0;
489   while (count) {
490     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
491     /* unpack receives into our local space */
492     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
493     source[imdex]  = recv_status.MPI_SOURCE;
494     lens[imdex]  = n;
495     slen += n;
496     count--;
497   }
498   PetscFree(recv_waits);
499 
500   /* move the data into the send scatter */
501   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
502   count = 0;
503   for ( i=0; i<nrecvs; i++ ) {
504     values = rvalues + i*nmax;
505     for ( j=0; j<lens[i]; j++ ) {
506       lrows[count++] = values[j] - base;
507     }
508   }
509   PetscFree(rvalues); PetscFree(lens);
510   PetscFree(owner); PetscFree(nprocs);
511 
512   /* actually zap the local rows */
513   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
514   PLogObjectParent(A,istmp);
515 
516   /*
517         Zero the required rows. If the "diagonal block" of the matrix
518      is square and the user wishes to set the diagonal we use seperate
519      code so that MatSetValues() is not called for each diagonal allocating
520      new memory, thus calling lots of mallocs and slowing things down.
521 
522        Contributed by: Mathew Knepley
523   */
524   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
525   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
526   if (diag && (l->A->M == l->A->N)) {
527     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
528   } else if (diag) {
529     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
530     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
531       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
532 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
533     }
534     for ( i = 0; i < slen; i++ ) {
535       row  = lrows[i] + rstart;
536       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
537     }
538     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
540   } else {
541     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
542   }
543   ierr = ISDestroy(istmp);CHKERRQ(ierr);
544   PetscFree(lrows);
545 
546   /* wait on sends */
547   if (nsends) {
548     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
549     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
550     PetscFree(send_status);
551   }
552   PetscFree(send_waits); PetscFree(svalues);
553 
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNC__
558 #define __FUNC__ "MatMult_MPIAIJ"
559 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
560 {
561   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
562   int        ierr,nt;
563 
564   PetscFunctionBegin;
565   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
566   if (nt != a->n) {
567     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
568   }
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNC__
577 #define __FUNC__ "MatMultAdd_MPIAIJ"
578 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
579 {
580   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
581   int        ierr;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ "MatMultTrans_MPIAIJ"
593 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   /* do nondiagonal part */
600   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
601   /* send it on its way */
602   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   /* do local part */
604   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
605   /* receive remote parts: note this assumes the values are not actually */
606   /* inserted in yy until the next line, which is true for my implementation*/
607   /* but is not perhaps always true. */
608   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNC__
613 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
614 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 /*
634   This only works correctly for square matrices where the subblock A->A is the
635    diagonal block
636 */
637 #undef __FUNC__
638 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
639 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
640 {
641   int        ierr;
642   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
643 
644   PetscFunctionBegin;
645   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
646   if (a->rstart != a->cstart || a->rend != a->cend) {
647     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
648   }
649   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNC__
654 #define __FUNC__ "MatScale_MPIAIJ"
655 int MatScale_MPIAIJ(Scalar *aa,Mat A)
656 {
657   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
658   int        ierr;
659 
660   PetscFunctionBegin;
661   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
662   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNC__
667 #define __FUNC__ "MatDestroy_MPIAIJ"
668 int MatDestroy_MPIAIJ(Mat mat)
669 {
670   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
671   int        ierr;
672 
673   PetscFunctionBegin;
674   if (--mat->refct > 0) PetscFunctionReturn(0);
675 
676   if (mat->mapping) {
677     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
678   }
679   if (mat->bmapping) {
680     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
681   }
682   if (mat->rmap) {
683     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
684   }
685   if (mat->cmap) {
686     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
687   }
688 #if defined(PETSC_USE_LOG)
689   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   PetscFree(aij->rowners);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) TableDelete(aij->colmap);
697 #else
698   if (aij->colmap) PetscFree(aij->colmap);
699 #endif
700   if (aij->garray) PetscFree(aij->garray);
701   if (aij->lvec)   VecDestroy(aij->lvec);
702   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
703   if (aij->rowvalues) PetscFree(aij->rowvalues);
704   PetscFree(aij);
705   PLogObjectDestroy(mat);
706   PetscHeaderDestroy(mat);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNC__
711 #define __FUNC__ "MatView_MPIAIJ_Binary"
712 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
713 {
714   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
715   int         ierr;
716 
717   PetscFunctionBegin;
718   if (aij->size == 1) {
719     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
720   }
721   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
728 {
729   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
730   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
731   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
732   int         size = aij->size;
733   FILE        *fd;
734   ViewerType  vtype;
735 
736   PetscFunctionBegin;
737   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
738   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
739     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
740     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
741       MatInfo info;
742       int     flg;
743       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
744       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
745       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
746       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
747       PetscSequentialPhaseBegin(mat->comm,1);
748       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
749          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
750       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
751          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
752       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
753       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
754       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
755       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
756       fflush(fd);
757       PetscSequentialPhaseEnd(mat->comm,1);
758       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
759       PetscFunctionReturn(0);
760     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
761       PetscFunctionReturn(0);
762     }
763   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
764     Draw       draw;
765     PetscTruth isnull;
766     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
767     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
768   }
769 
770   if (size == 1) {
771     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
772   } else {
773     /* assemble the entire matrix onto first processor. */
774     Mat         A;
775     Mat_SeqAIJ *Aloc;
776     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
777     Scalar      *a;
778 
779     if (!rank) {
780       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
781     } else {
782       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
783     }
784     PLogObjectParent(mat,A);
785 
786     /* copy over the A part */
787     Aloc = (Mat_SeqAIJ*) aij->A->data;
788     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
789     row = aij->rstart;
790     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
791     for ( i=0; i<m; i++ ) {
792       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
793       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
794     }
795     aj = Aloc->j;
796     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
797 
798     /* copy over the B part */
799     Aloc = (Mat_SeqAIJ*) aij->B->data;
800     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
801     row = aij->rstart;
802     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
803     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
804     for ( i=0; i<m; i++ ) {
805       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
806       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
807     }
808     PetscFree(ct);
809     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811     /*
812        Everyone has to call to draw the matrix since the graphics waits are
813        synchronized across all processors that share the Draw object
814     */
815     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
816       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
817     }
818     ierr = MatDestroy(A);CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNC__
824 #define __FUNC__ "MatView_MPIAIJ"
825 int MatView_MPIAIJ(Mat mat,Viewer viewer)
826 {
827   int         ierr;
828   ViewerType  vtype;
829 
830   PetscFunctionBegin;
831   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
832   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
833       PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) {
834     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
835     /*
836   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
837     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
838     */
839   } else {
840     SETERRQ(1,1,"Viewer type not supported by PETSc object");
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 /*
846     This has to provide several versions.
847 
848      2) a) use only local smoothing updating outer values only once.
849         b) local smoothing updating outer values each inner iteration
850      3) color updating out values betwen colors.
851 */
852 #undef __FUNC__
853 #define __FUNC__ "MatRelax_MPIAIJ"
854 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
855                            double fshift,int its,Vec xx)
856 {
857   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
858   Mat        AA = mat->A, BB = mat->B;
859   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
860   Scalar     *b,*x,*xs,*ls,d,*v,sum;
861   int        ierr,*idx, *diag;
862   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
863 
864   PetscFunctionBegin;
865   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
866   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
867   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
868   xs = x + shift; /* shift by one for index start of 1 */
869   ls = ls + shift;
870   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
871   diag = A->diag;
872   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
873     if (flag & SOR_ZERO_INITIAL_GUESS) {
874       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
875       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
876       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
877       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
878       PetscFunctionReturn(0);
879     }
880     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882     while (its--) {
883       /* go down through the rows */
884       for ( i=0; i<m; i++ ) {
885         n    = A->i[i+1] - A->i[i];
886 	PLogFlops(4*n+3);
887         idx  = A->j + A->i[i] + shift;
888         v    = A->a + A->i[i] + shift;
889         sum  = b[i];
890         SPARSEDENSEMDOT(sum,xs,v,idx,n);
891         d    = fshift + A->a[diag[i]+shift];
892         n    = B->i[i+1] - B->i[i];
893         idx  = B->j + B->i[i] + shift;
894         v    = B->a + B->i[i] + shift;
895         SPARSEDENSEMDOT(sum,ls,v,idx,n);
896         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
897       }
898       /* come up through the rows */
899       for ( i=m-1; i>-1; i-- ) {
900         n    = A->i[i+1] - A->i[i];
901 	PLogFlops(4*n+3)
902         idx  = A->j + A->i[i] + shift;
903         v    = A->a + A->i[i] + shift;
904         sum  = b[i];
905         SPARSEDENSEMDOT(sum,xs,v,idx,n);
906         d    = fshift + A->a[diag[i]+shift];
907         n    = B->i[i+1] - B->i[i];
908         idx  = B->j + B->i[i] + shift;
909         v    = B->a + B->i[i] + shift;
910         SPARSEDENSEMDOT(sum,ls,v,idx,n);
911         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
912       }
913     }
914   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
915     if (flag & SOR_ZERO_INITIAL_GUESS) {
916       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
917       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
918       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
919       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
920       PetscFunctionReturn(0);
921     }
922     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
923     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
924     while (its--) {
925       for ( i=0; i<m; i++ ) {
926         n    = A->i[i+1] - A->i[i];
927 	PLogFlops(4*n+3);
928         idx  = A->j + A->i[i] + shift;
929         v    = A->a + A->i[i] + shift;
930         sum  = b[i];
931         SPARSEDENSEMDOT(sum,xs,v,idx,n);
932         d    = fshift + A->a[diag[i]+shift];
933         n    = B->i[i+1] - B->i[i];
934         idx  = B->j + B->i[i] + shift;
935         v    = B->a + B->i[i] + shift;
936         SPARSEDENSEMDOT(sum,ls,v,idx,n);
937         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
938       }
939     }
940   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
941     if (flag & SOR_ZERO_INITIAL_GUESS) {
942       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
943       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
944       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
945       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
946       PetscFunctionReturn(0);
947     }
948     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
949     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
950     while (its--) {
951       for ( i=m-1; i>-1; i-- ) {
952         n    = A->i[i+1] - A->i[i];
953 	PLogFlops(4*n+3);
954         idx  = A->j + A->i[i] + shift;
955         v    = A->a + A->i[i] + shift;
956         sum  = b[i];
957         SPARSEDENSEMDOT(sum,xs,v,idx,n);
958         d    = fshift + A->a[diag[i]+shift];
959         n    = B->i[i+1] - B->i[i];
960         idx  = B->j + B->i[i] + shift;
961         v    = B->a + B->i[i] + shift;
962         SPARSEDENSEMDOT(sum,ls,v,idx,n);
963         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
964       }
965     }
966   } else {
967     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
968   }
969   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
970   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
971   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNC__
976 #define __FUNC__ "MatGetInfo_MPIAIJ"
977 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
978 {
979   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
980   Mat        A = mat->A, B = mat->B;
981   int        ierr;
982   double     isend[5], irecv[5];
983 
984   PetscFunctionBegin;
985   info->block_size     = 1.0;
986   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
987   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
988   isend[3] = info->memory;  isend[4] = info->mallocs;
989   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
990   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
991   isend[3] += info->memory;  isend[4] += info->mallocs;
992   if (flag == MAT_LOCAL) {
993     info->nz_used      = isend[0];
994     info->nz_allocated = isend[1];
995     info->nz_unneeded  = isend[2];
996     info->memory       = isend[3];
997     info->mallocs      = isend[4];
998   } else if (flag == MAT_GLOBAL_MAX) {
999     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1000     info->nz_used      = irecv[0];
1001     info->nz_allocated = irecv[1];
1002     info->nz_unneeded  = irecv[2];
1003     info->memory       = irecv[3];
1004     info->mallocs      = irecv[4];
1005   } else if (flag == MAT_GLOBAL_SUM) {
1006     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1007     info->nz_used      = irecv[0];
1008     info->nz_allocated = irecv[1];
1009     info->nz_unneeded  = irecv[2];
1010     info->memory       = irecv[3];
1011     info->mallocs      = irecv[4];
1012   }
1013   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1014   info->fill_ratio_needed = 0;
1015   info->factor_mallocs    = 0;
1016   info->rows_global       = (double)mat->M;
1017   info->columns_global    = (double)mat->N;
1018   info->rows_local        = (double)mat->m;
1019   info->columns_local     = (double)mat->N;
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNC__
1025 #define __FUNC__ "MatSetOption_MPIAIJ"
1026 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1027 {
1028   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1029   int        ierr;
1030 
1031   PetscFunctionBegin;
1032   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1033       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1034       op == MAT_COLUMNS_UNSORTED ||
1035       op == MAT_COLUMNS_SORTED ||
1036       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1037       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1038         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1039         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1040   } else if (op == MAT_ROW_ORIENTED) {
1041     a->roworiented = 1;
1042     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1043     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1044   } else if (op == MAT_ROWS_SORTED ||
1045              op == MAT_ROWS_UNSORTED ||
1046              op == MAT_SYMMETRIC ||
1047              op == MAT_STRUCTURALLY_SYMMETRIC ||
1048              op == MAT_YES_NEW_DIAGONALS)
1049     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1050   else if (op == MAT_COLUMN_ORIENTED) {
1051     a->roworiented = 0;
1052     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1053     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1054   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1055     a->donotstash = 1;
1056   } else if (op == MAT_NO_NEW_DIAGONALS){
1057     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1058   } else {
1059     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNC__
1065 #define __FUNC__ "MatGetSize_MPIAIJ"
1066 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1067 {
1068   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1069 
1070   PetscFunctionBegin;
1071   if (m) *m = mat->M;
1072   if (n) *n = mat->N;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNC__
1077 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1078 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1079 {
1080   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081 
1082   PetscFunctionBegin;
1083   if (m) *m = mat->m;
1084   if (n) *n = mat->n;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1090 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1093 
1094   PetscFunctionBegin;
1095   *m = mat->rstart; *n = mat->rend;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetRow_MPIAIJ"
1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1105   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1106   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1107   int        *cmap, *idx_p;
1108 
1109   PetscFunctionBegin;
1110   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1111   mat->getrowactive = PETSC_TRUE;
1112 
1113   if (!mat->rowvalues && (idx || v)) {
1114     /*
1115         allocate enough space to hold information from the longest row.
1116     */
1117     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1118     int     max = 1,m = mat->m,tmp;
1119     for ( i=0; i<m; i++ ) {
1120       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1121       if (max < tmp) { max = tmp; }
1122     }
1123     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1124     mat->rowindices = (int *) (mat->rowvalues + max);
1125   }
1126 
1127   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1128   lrow = row - rstart;
1129 
1130   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1131   if (!v)   {pvA = 0; pvB = 0;}
1132   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1133   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1134   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1135   nztot = nzA + nzB;
1136 
1137   cmap  = mat->garray;
1138   if (v  || idx) {
1139     if (nztot) {
1140       /* Sort by increasing column numbers, assuming A and B already sorted */
1141       int imark = -1;
1142       if (v) {
1143         *v = v_p = mat->rowvalues;
1144         for ( i=0; i<nzB; i++ ) {
1145           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1146           else break;
1147         }
1148         imark = i;
1149         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1150         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1151       }
1152       if (idx) {
1153         *idx = idx_p = mat->rowindices;
1154         if (imark > -1) {
1155           for ( i=0; i<imark; i++ ) {
1156             idx_p[i] = cmap[cworkB[i]];
1157           }
1158         } else {
1159           for ( i=0; i<nzB; i++ ) {
1160             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1161             else break;
1162           }
1163           imark = i;
1164         }
1165         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1166         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1167       }
1168     } else {
1169       if (idx) *idx = 0;
1170       if (v)   *v   = 0;
1171     }
1172   }
1173   *nz = nztot;
1174   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1175   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNC__
1180 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1181 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1182 {
1183   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1184 
1185   PetscFunctionBegin;
1186   if (aij->getrowactive == PETSC_FALSE) {
1187     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1188   }
1189   aij->getrowactive = PETSC_FALSE;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNC__
1194 #define __FUNC__ "MatNorm_MPIAIJ"
1195 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1196 {
1197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1198   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1199   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1200   double     sum = 0.0;
1201   Scalar     *v;
1202 
1203   PetscFunctionBegin;
1204   if (aij->size == 1) {
1205     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1206   } else {
1207     if (type == NORM_FROBENIUS) {
1208       v = amat->a;
1209       for (i=0; i<amat->nz; i++ ) {
1210 #if defined(PETSC_USE_COMPLEX)
1211         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1212 #else
1213         sum += (*v)*(*v); v++;
1214 #endif
1215       }
1216       v = bmat->a;
1217       for (i=0; i<bmat->nz; i++ ) {
1218 #if defined(PETSC_USE_COMPLEX)
1219         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1220 #else
1221         sum += (*v)*(*v); v++;
1222 #endif
1223       }
1224       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1225       *norm = sqrt(*norm);
1226     } else if (type == NORM_1) { /* max column norm */
1227       double *tmp, *tmp2;
1228       int    *jj, *garray = aij->garray;
1229       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1230       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1231       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1232       *norm = 0.0;
1233       v = amat->a; jj = amat->j;
1234       for ( j=0; j<amat->nz; j++ ) {
1235         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1236       }
1237       v = bmat->a; jj = bmat->j;
1238       for ( j=0; j<bmat->nz; j++ ) {
1239         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1240       }
1241       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1242       for ( j=0; j<aij->N; j++ ) {
1243         if (tmp2[j] > *norm) *norm = tmp2[j];
1244       }
1245       PetscFree(tmp); PetscFree(tmp2);
1246     } else if (type == NORM_INFINITY) { /* max row norm */
1247       double ntemp = 0.0;
1248       for ( j=0; j<amat->m; j++ ) {
1249         v = amat->a + amat->i[j] + shift;
1250         sum = 0.0;
1251         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1252           sum += PetscAbsScalar(*v); v++;
1253         }
1254         v = bmat->a + bmat->i[j] + shift;
1255         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1256           sum += PetscAbsScalar(*v); v++;
1257         }
1258         if (sum > ntemp) ntemp = sum;
1259       }
1260       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1261     } else {
1262       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1263     }
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNC__
1269 #define __FUNC__ "MatTranspose_MPIAIJ"
1270 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1271 {
1272   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1273   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1274   int        ierr,shift = Aloc->indexshift;
1275   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1276   Mat        B;
1277   Scalar     *array;
1278 
1279   PetscFunctionBegin;
1280   if (matout == PETSC_NULL && M != N) {
1281     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1282   }
1283 
1284   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1285 
1286   /* copy over the A part */
1287   Aloc = (Mat_SeqAIJ*) a->A->data;
1288   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1289   row = a->rstart;
1290   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1291   for ( i=0; i<m; i++ ) {
1292     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1293     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1294   }
1295   aj = Aloc->j;
1296   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1297 
1298   /* copy over the B part */
1299   Aloc = (Mat_SeqAIJ*) a->B->data;
1300   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1301   row = a->rstart;
1302   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1303   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1304   for ( i=0; i<m; i++ ) {
1305     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1306     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1307   }
1308   PetscFree(ct);
1309   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1310   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1311   if (matout != PETSC_NULL) {
1312     *matout = B;
1313   } else {
1314     PetscOps       *Abops;
1315     struct _MatOps *Aops;
1316 
1317     /* This isn't really an in-place transpose .... but free data structures from a */
1318     PetscFree(a->rowners);
1319     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1320     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1321 #if defined (PETSC_USE_CTABLE)
1322     if (a->colmap) TableDelete(a->colmap);
1323 #else
1324     if (a->colmap) PetscFree(a->colmap);
1325 #endif
1326     if (a->garray) PetscFree(a->garray);
1327     if (a->lvec) VecDestroy(a->lvec);
1328     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1329     PetscFree(a);
1330 
1331     /*
1332        This is horrible, horrible code. We need to keep the
1333       A pointers for the bops and ops but copy everything
1334       else from C.
1335     */
1336     Abops   = A->bops;
1337     Aops    = A->ops;
1338     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1339     A->bops = Abops;
1340     A->ops  = Aops;
1341     PetscHeaderDestroy(B);
1342   }
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNC__
1347 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1348 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1349 {
1350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1351   Mat a = aij->A, b = aij->B;
1352   int ierr,s1,s2,s3;
1353 
1354   PetscFunctionBegin;
1355   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1356   if (rr) {
1357     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1358     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1359     /* Overlap communication with computation. */
1360     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1361   }
1362   if (ll) {
1363     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1364     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1365     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1366   }
1367   /* scale  the diagonal block */
1368   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1369 
1370   if (rr) {
1371     /* Do a scatter end and then right scale the off-diagonal block */
1372     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1373     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1374   }
1375 
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNC__
1381 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1382 int MatPrintHelp_MPIAIJ(Mat A)
1383 {
1384   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1385   int        ierr;
1386 
1387   PetscFunctionBegin;
1388   if (!a->rank) {
1389     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNC__
1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1397 {
1398   PetscFunctionBegin;
1399   *bs = 1;
1400   PetscFunctionReturn(0);
1401 }
1402 #undef __FUNC__
1403 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1404 int MatSetUnfactored_MPIAIJ(Mat A)
1405 {
1406   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1407   int        ierr;
1408 
1409   PetscFunctionBegin;
1410   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ "MatEqual_MPIAIJ"
1416 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1417 {
1418   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1419   Mat        a, b, c, d;
1420   PetscTruth flg;
1421   int        ierr;
1422 
1423   PetscFunctionBegin;
1424   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1425   a = matA->A; b = matA->B;
1426   c = matB->A; d = matB->B;
1427 
1428   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1429   if (flg == PETSC_TRUE) {
1430     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1431   }
1432   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNC__
1437 #define __FUNC__ "MatCopy_MPIAIJ"
1438 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1439 {
1440   int        ierr;
1441   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1442   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1443 
1444   PetscFunctionBegin;
1445   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1446     /* because of the column compression in the off-processor part of the matrix a->B,
1447        the number of columns in a->B and b->B may be different, hence we cannot call
1448        the MatCopy() directly on the two parts. If need be, we can provide a more
1449        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1450        then copying the submatrices */
1451     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1452   } else {
1453     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1454     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1460 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1461 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1462 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1463 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1464 
1465 /* -------------------------------------------------------------------*/
1466 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1467        MatGetRow_MPIAIJ,
1468        MatRestoreRow_MPIAIJ,
1469        MatMult_MPIAIJ,
1470        MatMultAdd_MPIAIJ,
1471        MatMultTrans_MPIAIJ,
1472        MatMultTransAdd_MPIAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477        0,
1478        0,
1479        MatRelax_MPIAIJ,
1480        MatTranspose_MPIAIJ,
1481        MatGetInfo_MPIAIJ,
1482        MatEqual_MPIAIJ,
1483        MatGetDiagonal_MPIAIJ,
1484        MatDiagonalScale_MPIAIJ,
1485        MatNorm_MPIAIJ,
1486        MatAssemblyBegin_MPIAIJ,
1487        MatAssemblyEnd_MPIAIJ,
1488        0,
1489        MatSetOption_MPIAIJ,
1490        MatZeroEntries_MPIAIJ,
1491        MatZeroRows_MPIAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496        MatGetSize_MPIAIJ,
1497        MatGetLocalSize_MPIAIJ,
1498        MatGetOwnershipRange_MPIAIJ,
1499        0,
1500        0,
1501        0,
1502        0,
1503        MatDuplicate_MPIAIJ,
1504        0,
1505        0,
1506        0,
1507        0,
1508        0,
1509        MatGetSubMatrices_MPIAIJ,
1510        MatIncreaseOverlap_MPIAIJ,
1511        MatGetValues_MPIAIJ,
1512        MatCopy_MPIAIJ,
1513        MatPrintHelp_MPIAIJ,
1514        MatScale_MPIAIJ,
1515        0,
1516        0,
1517        0,
1518        MatGetBlockSize_MPIAIJ,
1519        0,
1520        0,
1521        0,
1522        0,
1523        MatFDColoringCreate_MPIAIJ,
1524        0,
1525        MatSetUnfactored_MPIAIJ,
1526        0,
1527        0,
1528        MatGetSubMatrix_MPIAIJ,
1529        0,
1530        0,
1531        MatGetMaps_Petsc};
1532 
1533 /* ----------------------------------------------------------------------------------------*/
1534 
1535 EXTERN_C_BEGIN
1536 #undef __FUNC__
1537 #define __FUNC__ "MatStoreValues_MPIAIJ"
1538 int MatStoreValues_MPIAIJ(Mat mat)
1539 {
1540   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1541   int        ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1545   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 EXTERN_C_END
1549 
1550 EXTERN_C_BEGIN
1551 #undef __FUNC__
1552 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1553 int MatRetrieveValues_MPIAIJ(Mat mat)
1554 {
1555   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1556   int        ierr;
1557 
1558   PetscFunctionBegin;
1559   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1560   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 EXTERN_C_END
1564 
1565 #include "pc.h"
1566 EXTERN_C_BEGIN
1567 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1568 EXTERN_C_END
1569 
1570 #undef __FUNC__
1571 #define __FUNC__ "MatCreateMPIAIJ"
1572 /*@C
1573    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1574    (the default parallel PETSc format).  For good matrix assembly performance
1575    the user should preallocate the matrix storage by setting the parameters
1576    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1577    performance can be increased by more than a factor of 50.
1578 
1579    Collective on MPI_Comm
1580 
1581    Input Parameters:
1582 +  comm - MPI communicator
1583 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1584            This value should be the same as the local size used in creating the
1585            y vector for the matrix-vector product y = Ax.
1586 .  n - This value should be the same as the local size used in creating the
1587        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1588        calculated if N is given) For square matrices n is almost always m.
1589 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1590 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1591 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1592            (same value is used for all local rows)
1593 .  d_nnz - array containing the number of nonzeros in the various rows of the
1594            DIAGONAL portion of the local submatrix (possibly different for each row)
1595            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1596            The size of this array is equal to the number of local rows, i.e 'm'.
1597            You must leave room for the diagonal entry even if it is zero.
1598 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1599            submatrix (same value is used for all local rows).
1600 -  o_nnz - array containing the number of nonzeros in the various rows of the
1601            OFF-DIAGONAL portion of the local submatrix (possibly different for
1602            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1603            structure. The size of this array is equal to the number
1604            of local rows, i.e 'm'.
1605 
1606    Output Parameter:
1607 .  A - the matrix
1608 
1609    Notes:
1610    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1611    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1612    storage requirements for this matrix.
1613 
1614    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1615    processor than it must be used on all processors that share the object for
1616    that argument.
1617 
1618    The AIJ format (also called the Yale sparse matrix format or
1619    compressed row storage), is fully compatible with standard Fortran 77
1620    storage.  That is, the stored row and column indices can begin at
1621    either one (as in Fortran) or zero.  See the users manual for details.
1622 
1623    The user MUST specify either the local or global matrix dimensions
1624    (possibly both).
1625 
1626    The parallel matrix is partitioned such that the first m0 rows belong to
1627    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1628    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1629 
1630    The DIAGONAL portion of the local submatrix of a processor can be defined
1631    as the submatrix which is obtained by extraction the part corresponding
1632    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1633    first row that belongs to the processor, and r2 is the last row belonging
1634    to the this processor. This is a square mxm matrix. The remaining portion
1635    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1636 
1637    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1638 
1639    By default, this format uses inodes (identical nodes) when possible.
1640    We search for consecutive rows with the same nonzero structure, thereby
1641    reusing matrix information to achieve increased efficiency.
1642 
1643    Options Database Keys:
1644 +  -mat_aij_no_inode  - Do not use inodes
1645 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1646 -  -mat_aij_oneindex - Internally use indexing starting at 1
1647         rather than 0.  Note that when calling MatSetValues(),
1648         the user still MUST index entries starting at 0!
1649 .   -mat_mpi - use the parallel matrix data structures even on one processor
1650                (defaults to using SeqBAIJ format on one processor)
1651 .   -mat_mpi - use the parallel matrix data structures even on one processor
1652                (defaults to using SeqAIJ format on one processor)
1653 
1654 
1655    Example usage:
1656 
1657    Consider the following 8x8 matrix with 34 non-zero values, that is
1658    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1659    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1660    as follows:
1661 
1662 .vb
1663             1  2  0  |  0  3  0  |  0  4
1664     Proc0   0  5  6  |  7  0  0  |  8  0
1665             9  0 10  | 11  0  0  | 12  0
1666     -------------------------------------
1667            13  0 14  | 15 16 17  |  0  0
1668     Proc1   0 18  0  | 19 20 21  |  0  0
1669             0  0  0  | 22 23  0  | 24  0
1670     -------------------------------------
1671     Proc2  25 26 27  |  0  0 28  | 29  0
1672            30  0  0  | 31 32 33  |  0 34
1673 .ve
1674 
1675    This can be represented as a collection of submatrices as:
1676 
1677 .vb
1678       A B C
1679       D E F
1680       G H I
1681 .ve
1682 
1683    Where the submatrices A,B,C are owned by proc0, D,E,F are
1684    owned by proc1, G,H,I are owned by proc2.
1685 
1686    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1687    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1688    The 'M','N' parameters are 8,8, and have the same values on all procs.
1689 
1690    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1691    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1692    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1693    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1694    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1695    matrix, ans [DF] as another SeqAIJ matrix.
1696 
1697    When d_nz, o_nz parameters are specified, d_nz storage elements are
1698    allocated for every row of the local diagonal submatrix, and o_nz
1699    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1700    One way to choose d_nz and o_nz is to use the max nonzerors per local
1701    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1702    In this case, the values of d_nz,o_nz are:
1703 .vb
1704      proc0 : dnz = 2, o_nz = 2
1705      proc1 : dnz = 3, o_nz = 2
1706      proc2 : dnz = 1, o_nz = 4
1707 .ve
1708    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1709    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1710    for proc3. i.e we are using 12+15+10=37 storage locations to store
1711    34 values.
1712 
1713    When d_nnz, o_nnz parameters are specified, the storage is specified
1714    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1715    In the above case the values for d_nnz,o_nnz are:
1716 .vb
1717      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1718      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1719      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1720 .ve
1721    Here the space allocated is sum of all the above values i.e 34, and
1722    hence pre-allocation is perfect.
1723 
1724    Level: intermediate
1725 
1726 .keywords: matrix, aij, compressed row, sparse, parallel
1727 
1728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1729 @*/
1730 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1731 {
1732   Mat          B;
1733   Mat_MPIAIJ   *b;
1734   int          ierr, i,size,flag1 = 0, flag2 = 0;
1735 
1736   PetscFunctionBegin;
1737   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1738   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1739   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1740   if (!flag1 && !flag2 && size == 1) {
1741     if (M == PETSC_DECIDE) M = m;
1742     if (N == PETSC_DECIDE) N = n;
1743     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1744     PetscFunctionReturn(0);
1745   }
1746 
1747   *A = 0;
1748   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1749   PLogObjectCreate(B);
1750   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1751   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1752   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1753   B->ops->destroy = MatDestroy_MPIAIJ;
1754   B->ops->view    = MatView_MPIAIJ;
1755   B->factor       = 0;
1756   B->assembled    = PETSC_FALSE;
1757   B->mapping      = 0;
1758 
1759   B->insertmode      = NOT_SET_VALUES;
1760   b->size            = size;
1761   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1762 
1763   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1764     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1765   }
1766 
1767   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1768   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1769   b->m = m; B->m = m;
1770   b->n = n; B->n = n;
1771   b->N = N; B->N = N;
1772   b->M = M; B->M = M;
1773 
1774   /* the information in the maps duplicates the information computed below, eventually
1775      we should remove the duplicate information that is not contained in the maps */
1776   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1777   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1778 
1779   /* build local table of row and column ownerships */
1780   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1781   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1782   b->cowners = b->rowners + b->size + 2;
1783   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1784   b->rowners[0] = 0;
1785   for ( i=2; i<=b->size; i++ ) {
1786     b->rowners[i] += b->rowners[i-1];
1787   }
1788   b->rstart = b->rowners[b->rank];
1789   b->rend   = b->rowners[b->rank+1];
1790   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1791   b->cowners[0] = 0;
1792   for ( i=2; i<=b->size; i++ ) {
1793     b->cowners[i] += b->cowners[i-1];
1794   }
1795   b->cstart = b->cowners[b->rank];
1796   b->cend   = b->cowners[b->rank+1];
1797 
1798   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1799   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1800   PLogObjectParent(B,b->A);
1801   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1802   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1803   PLogObjectParent(B,b->B);
1804 
1805   /* build cache for off array entries formed */
1806   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1807   b->donotstash  = 0;
1808   b->colmap      = 0;
1809   b->garray      = 0;
1810   b->roworiented = 1;
1811 
1812   /* stuff used for matrix vector multiply */
1813   b->lvec      = 0;
1814   b->Mvctx     = 0;
1815 
1816   /* stuff for MatGetRow() */
1817   b->rowindices   = 0;
1818   b->rowvalues    = 0;
1819   b->getrowactive = PETSC_FALSE;
1820 
1821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1822                                      "MatStoreValues_MPIAIJ",
1823                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1825                                      "MatRetrieveValues_MPIAIJ",
1826                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1828                                      "MatGetDiagonalBlock_MPIAIJ",
1829                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1830   *A = B;
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNC__
1835 #define __FUNC__ "MatDuplicate_MPIAIJ"
1836 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1837 {
1838   Mat        mat;
1839   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1840   int        ierr, len=0, flg;
1841 
1842   PetscFunctionBegin;
1843   *newmat       = 0;
1844   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1845   PLogObjectCreate(mat);
1846   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1847   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1848   mat->ops->destroy = MatDestroy_MPIAIJ;
1849   mat->ops->view    = MatView_MPIAIJ;
1850   mat->factor       = matin->factor;
1851   mat->assembled    = PETSC_TRUE;
1852   mat->insertmode   = NOT_SET_VALUES;
1853 
1854   a->m = mat->m   = oldmat->m;
1855   a->n = mat->n   = oldmat->n;
1856   a->M = mat->M   = oldmat->M;
1857   a->N = mat->N   = oldmat->N;
1858 
1859   a->rstart       = oldmat->rstart;
1860   a->rend         = oldmat->rend;
1861   a->cstart       = oldmat->cstart;
1862   a->cend         = oldmat->cend;
1863   a->size         = oldmat->size;
1864   a->rank         = oldmat->rank;
1865   a->donotstash   = oldmat->donotstash;
1866   a->roworiented  = oldmat->roworiented;
1867   a->rowindices   = 0;
1868   a->rowvalues    = 0;
1869   a->getrowactive = PETSC_FALSE;
1870 
1871   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1872   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1873   a->cowners = a->rowners + a->size + 2;
1874   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1875   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1876   if (oldmat->colmap) {
1877 #if defined (PETSC_USE_CTABLE)
1878     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1879 #else
1880     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1881     PLogObjectMemory(mat,(a->N)*sizeof(int));
1882     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1883 #endif
1884   } else a->colmap = 0;
1885   if (oldmat->garray) {
1886     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1887     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1888     PLogObjectMemory(mat,len*sizeof(int));
1889     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1890   } else a->garray = 0;
1891 
1892   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1893   PLogObjectParent(mat,a->lvec);
1894   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1895   PLogObjectParent(mat,a->Mvctx);
1896   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1897   PLogObjectParent(mat,a->A);
1898   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1899   PLogObjectParent(mat,a->B);
1900   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1901   if (flg) {
1902     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1903   }
1904   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1905   *newmat = mat;
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #include "sys.h"
1910 
1911 #undef __FUNC__
1912 #define __FUNC__ "MatLoad_MPIAIJ"
1913 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1914 {
1915   Mat          A;
1916   Scalar       *vals,*svals;
1917   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1918   MPI_Status   status;
1919   int          i, nz, ierr, j,rstart, rend, fd;
1920   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1921   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1922   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1923 
1924   PetscFunctionBegin;
1925   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1926   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1927   if (!rank) {
1928     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1929     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1930     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1931     if (header[3] < 0) {
1932       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1933     }
1934   }
1935 
1936   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1937   M = header[1]; N = header[2];
1938   /* determine ownership of all rows */
1939   m = M/size + ((M % size) > rank);
1940   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1941   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1942   rowners[0] = 0;
1943   for ( i=2; i<=size; i++ ) {
1944     rowners[i] += rowners[i-1];
1945   }
1946   rstart = rowners[rank];
1947   rend   = rowners[rank+1];
1948 
1949   /* distribute row lengths to all processors */
1950   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1951   offlens = ourlens + (rend-rstart);
1952   if (!rank) {
1953     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1954     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1955     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1956     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1957     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1958     PetscFree(sndcounts);
1959   } else {
1960     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1961   }
1962 
1963   if (!rank) {
1964     /* calculate the number of nonzeros on each processor */
1965     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1966     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1967     for ( i=0; i<size; i++ ) {
1968       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1969         procsnz[i] += rowlengths[j];
1970       }
1971     }
1972     PetscFree(rowlengths);
1973 
1974     /* determine max buffer needed and allocate it */
1975     maxnz = 0;
1976     for ( i=0; i<size; i++ ) {
1977       maxnz = PetscMax(maxnz,procsnz[i]);
1978     }
1979     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1980 
1981     /* read in my part of the matrix column indices  */
1982     nz     = procsnz[0];
1983     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1984     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1985 
1986     /* read in every one elses and ship off */
1987     for ( i=1; i<size; i++ ) {
1988       nz   = procsnz[i];
1989       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1990       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1991     }
1992     PetscFree(cols);
1993   } else {
1994     /* determine buffer space needed for message */
1995     nz = 0;
1996     for ( i=0; i<m; i++ ) {
1997       nz += ourlens[i];
1998     }
1999     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2000 
2001     /* receive message of column indices*/
2002     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2003     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2004     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2005   }
2006 
2007   /* determine column ownership if matrix is not square */
2008   if (N != M) {
2009     n      = N/size + ((N % size) > rank);
2010     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2011     cstart = cend - n;
2012   } else {
2013     cstart = rstart;
2014     cend   = rend;
2015     n      = cend - cstart;
2016   }
2017 
2018   /* loop over local rows, determining number of off diagonal entries */
2019   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2020   jj = 0;
2021   for ( i=0; i<m; i++ ) {
2022     for ( j=0; j<ourlens[i]; j++ ) {
2023       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2024       jj++;
2025     }
2026   }
2027 
2028   /* create our matrix */
2029   for ( i=0; i<m; i++ ) {
2030     ourlens[i] -= offlens[i];
2031   }
2032   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2033   A = *newmat;
2034   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2035   for ( i=0; i<m; i++ ) {
2036     ourlens[i] += offlens[i];
2037   }
2038 
2039   if (!rank) {
2040     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2041 
2042     /* read in my part of the matrix numerical values  */
2043     nz   = procsnz[0];
2044     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2045 
2046     /* insert into matrix */
2047     jj      = rstart;
2048     smycols = mycols;
2049     svals   = vals;
2050     for ( i=0; i<m; i++ ) {
2051       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2052       smycols += ourlens[i];
2053       svals   += ourlens[i];
2054       jj++;
2055     }
2056 
2057     /* read in other processors and ship out */
2058     for ( i=1; i<size; i++ ) {
2059       nz   = procsnz[i];
2060       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2061       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2062     }
2063     PetscFree(procsnz);
2064   } else {
2065     /* receive numeric values */
2066     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2067 
2068     /* receive message of values*/
2069     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2070     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2071     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2072 
2073     /* insert into matrix */
2074     jj      = rstart;
2075     smycols = mycols;
2076     svals   = vals;
2077     for ( i=0; i<m; i++ ) {
2078       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2079       smycols += ourlens[i];
2080       svals   += ourlens[i];
2081       jj++;
2082     }
2083   }
2084   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2085 
2086   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNC__
2092 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2093 /*
2094     Not great since it makes two copies of the submatrix, first an SeqAIJ
2095   in local and then by concatenating the local matrices the end result.
2096   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2097 */
2098 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2099 {
2100   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2101   Mat        *local,M, Mreuse;
2102   Scalar     *vwork,*aa;
2103   MPI_Comm   comm = mat->comm;
2104   Mat_SeqAIJ *aij;
2105   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2106 
2107   PetscFunctionBegin;
2108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110 
2111   if (call ==  MAT_REUSE_MATRIX) {
2112     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2113     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2114     local = &Mreuse;
2115     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2116   } else {
2117     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2118     Mreuse = *local;
2119     PetscFree(local);
2120   }
2121 
2122   /*
2123       m - number of local rows
2124       n - number of columns (same on all processors)
2125       rstart - first row in new global matrix generated
2126   */
2127   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2128   if (call == MAT_INITIAL_MATRIX) {
2129     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2130     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2131     ii  = aij->i;
2132     jj  = aij->j;
2133 
2134     /*
2135         Determine the number of non-zeros in the diagonal and off-diagonal
2136         portions of the matrix in order to do correct preallocation
2137     */
2138 
2139     /* first get start and end of "diagonal" columns */
2140     if (csize == PETSC_DECIDE) {
2141       nlocal = n/size + ((n % size) > rank);
2142     } else {
2143       nlocal = csize;
2144     }
2145     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2146     rstart = rend - nlocal;
2147     if (rank == size - 1 && rend != n) {
2148       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2149     }
2150 
2151     /* next, compute all the lengths */
2152     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2153     olens = dlens + m;
2154     for ( i=0; i<m; i++ ) {
2155       jend = ii[i+1] - ii[i];
2156       olen = 0;
2157       dlen = 0;
2158       for ( j=0; j<jend; j++ ) {
2159         if ( *jj < rstart || *jj >= rend) olen++;
2160         else dlen++;
2161         jj++;
2162       }
2163       olens[i] = olen;
2164       dlens[i] = dlen;
2165     }
2166     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2167     PetscFree(dlens);
2168   } else {
2169     int ml,nl;
2170 
2171     M = *newmat;
2172     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2173     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2174     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2175     /*
2176          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2177        rather than the slower MatSetValues().
2178     */
2179     M->was_assembled = PETSC_TRUE;
2180     M->assembled     = PETSC_FALSE;
2181   }
2182   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2183   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2184   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2185   ii  = aij->i;
2186   jj  = aij->j;
2187   aa  = aij->a;
2188   for (i=0; i<m; i++) {
2189     row   = rstart + i;
2190     nz    = ii[i+1] - ii[i];
2191     cwork = jj;     jj += nz;
2192     vwork = aa;     aa += nz;
2193     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2194   }
2195 
2196   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2197   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   *newmat = M;
2199 
2200   /* save submatrix used in processor for next request */
2201   if (call ==  MAT_INITIAL_MATRIX) {
2202     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2203     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2204   }
2205 
2206   PetscFunctionReturn(0);
2207 }
2208