xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e1311b9049e89cb3452dcd306fde571f4b440ff2)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.233 1998/03/16 18:55:15 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           }
238           else col = in[j];
239           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
240           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
241           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
242         }
243       }
244     }
245     else {
246       if (roworiented && !aij->donotstash) {
247         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
248       }
249       else {
250         if (!aij->donotstash) {
251           row = im[i];
252           for ( j=0; j<n; j++ ) {
253             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
254           }
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatGetValues_MPIAIJ"
264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
265 {
266   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
267   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
268   int        cstart = aij->cstart, cend = aij->cend,row,col;
269 
270   PetscFunctionBegin;
271   for ( i=0; i<m; i++ ) {
272     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
273     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
274     if (idxm[i] >= rstart && idxm[i] < rend) {
275       row = idxm[i] - rstart;
276       for ( j=0; j<n; j++ ) {
277         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
278         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
279         if (idxn[j] >= cstart && idxn[j] < cend){
280           col = idxn[j] - cstart;
281           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
282         }
283         else {
284           if (!aij->colmap) {
285             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
286           }
287           col = aij->colmap[idxn[j]] - 1;
288           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
289           else {
290             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
291           }
292         }
293       }
294     } else {
295       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   PetscFunctionBegin;
315   /* make sure all processors are either in INSERTMODE or ADDMODE */
316   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
317   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
319   }
320   mat->insertmode = addv; /* in case this processor had no cache */
321 
322   /*  first count number of contributors to each processor */
323   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
325   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
326   for ( i=0; i<aij->stash.n; i++ ) {
327     idx = aij->stash.idx[i];
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
331       }
332     }
333   }
334   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
335 
336   /* inform other processors of number of messages and max length*/
337   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
339   nreceives = work[rank];
340   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
341   nmax = work[rank];
342   PetscFree(work);
343 
344   /* post receives:
345        1) each message will consist of ordered pairs
346      (global index,value) we store the global index as a double
347      to simplify the message passing.
348        2) since we don't know how long each individual message is we
349      allocate the largest needed buffer for each receive. Potentially
350      this is a lot of wasted space.
351 
352 
353        This could be done better.
354   */
355   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
357   for ( i=0; i<nreceives; i++ ) {
358     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
359               comm,recv_waits+i);CHKERRQ(ierr);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
367   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
368   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
369   starts[0]  = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<aij->stash.n; i++ ) {
372     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
373     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
374     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
375   }
376   PetscFree(owner);
377   starts[0] = 0;
378   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<size; i++ ) {
381     if (procs[i]) {
382       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   PetscFunctionReturn(0);
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   PetscFunctionBegin;
412   /*  wait on receives */
413   while (count) {
414     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
415     /* unpack receives into our local space */
416     values = aij->rvalues + 3*imdex*aij->rmax;
417     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
418     n = n/3;
419     for ( i=0; i<n; i++ ) {
420       row = (int) PetscReal(values[3*i]) - aij->rstart;
421       col = (int) PetscReal(values[3*i+1]);
422       val = values[3*i+2];
423       if (col >= aij->cstart && col < aij->cend) {
424         col -= aij->cstart;
425         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
426       } else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478 
479   PetscFunctionBegin;
480   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
481   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNC__
486 #define __FUNC__ "MatZeroRows_MPIAIJ"
487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
488 {
489   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
490   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
491   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
492   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
493   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
494   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
495   MPI_Comm       comm = A->comm;
496   MPI_Request    *send_waits,*recv_waits;
497   MPI_Status     recv_status,*send_status;
498   IS             istmp;
499 
500   PetscFunctionBegin;
501   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
502   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
503 
504   /*  first count number of contributors to each processor */
505   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
506   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
507   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
508   for ( i=0; i<N; i++ ) {
509     idx = rows[i];
510     found = 0;
511     for ( j=0; j<size; j++ ) {
512       if (idx >= owners[j] && idx < owners[j+1]) {
513         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
514       }
515     }
516     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
517   }
518   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
519 
520   /* inform other processors of number of messages and max length*/
521   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
522   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
523   nrecvs = work[rank];
524   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
525   nmax = work[rank];
526   PetscFree(work);
527 
528   /* post receives:   */
529   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
530   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
531   for ( i=0; i<nrecvs; i++ ) {
532     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
533   }
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
540   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
541   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
542   starts[0] = 0;
543   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
544   for ( i=0; i<N; i++ ) {
545     svalues[starts[owner[i]]++] = rows[i];
546   }
547   ISRestoreIndices(is,&rows);
548 
549   starts[0] = 0;
550   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
551   count = 0;
552   for ( i=0; i<size; i++ ) {
553     if (procs[i]) {
554       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
555     }
556   }
557   PetscFree(starts);
558 
559   base = owners[rank];
560 
561   /*  wait on receives */
562   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
563   source = lens + nrecvs;
564   count  = nrecvs; slen = 0;
565   while (count) {
566     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
567     /* unpack receives into our local space */
568     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
569     source[imdex]  = recv_status.MPI_SOURCE;
570     lens[imdex]  = n;
571     slen += n;
572     count--;
573   }
574   PetscFree(recv_waits);
575 
576   /* move the data into the send scatter */
577   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
578   count = 0;
579   for ( i=0; i<nrecvs; i++ ) {
580     values = rvalues + i*nmax;
581     for ( j=0; j<lens[i]; j++ ) {
582       lrows[count++] = values[j] - base;
583     }
584   }
585   PetscFree(rvalues); PetscFree(lens);
586   PetscFree(owner); PetscFree(nprocs);
587 
588   /* actually zap the local rows */
589   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
590   PLogObjectParent(A,istmp);
591 
592   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
593   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
594   ierr = ISDestroy(istmp); CHKERRQ(ierr);
595 
596   if (diag) {
597     for ( i = 0; i < slen; i++ ) {
598       row = lrows[i] + rstart;
599       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
600     }
601     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
602     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
603   }
604   PetscFree(lrows);
605  /* wait on sends */
606   if (nsends) {
607     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
608     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
609     PetscFree(send_status);
610   }
611   PetscFree(send_waits); PetscFree(svalues);
612 
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNC__
617 #define __FUNC__ "MatMult_MPIAIJ"
618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
619 {
620   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
621   int        ierr,nt;
622 
623   PetscFunctionBegin;
624   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
625   if (nt != a->n) {
626     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
627   }
628   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
629   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
630   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
631   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatMultAdd_MPIAIJ"
637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
638 {
639   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
640   int        ierr;
641 
642   PetscFunctionBegin;
643   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
644   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
645   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
646   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNC__
651 #define __FUNC__ "MatMultTrans_MPIAIJ"
652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
653 {
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655   int        ierr;
656 
657   PetscFunctionBegin;
658   /* do nondiagonal part */
659   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
660   /* send it on its way */
661   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
662   /* do local part */
663   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
664   /* receive remote parts: note this assumes the values are not actually */
665   /* inserted in yy until the next line, which is true for my implementation*/
666   /* but is not perhaps always true. */
667   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676   int        ierr;
677 
678   PetscFunctionBegin;
679   /* do nondiagonal part */
680   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
681   /* send it on its way */
682   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
683   /* do local part */
684   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
685   /* receive remote parts: note this assumes the values are not actually */
686   /* inserted in yy until the next line, which is true for my implementation*/
687   /* but is not perhaps always true. */
688   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 /*
693   This only works correctly for square matrices where the subblock A->A is the
694    diagonal block
695 */
696 #undef __FUNC__
697 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
699 {
700   int        ierr;
701   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
702 
703   PetscFunctionBegin;
704   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
705   if (a->rstart != a->cstart || a->rend != a->cend) {
706     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
707   }
708   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatScale_MPIAIJ"
714 int MatScale_MPIAIJ(Scalar *aa,Mat A)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
717   int        ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
721   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatDestroy_MPIAIJ"
727 int MatDestroy_MPIAIJ(Mat mat)
728 {
729   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
730   int        ierr;
731 
732   PetscFunctionBegin;
733 #if defined(USE_PETSC_LOG)
734   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
735 #endif
736   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
737   PetscFree(aij->rowners);
738   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
739   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
740   if (aij->colmap) PetscFree(aij->colmap);
741   if (aij->garray) PetscFree(aij->garray);
742   if (aij->lvec)   VecDestroy(aij->lvec);
743   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
744   if (aij->rowvalues) PetscFree(aij->rowvalues);
745   PetscFree(aij);
746   PLogObjectDestroy(mat);
747   PetscHeaderDestroy(mat);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNC__
752 #define __FUNC__ "MatView_MPIAIJ_Binary"
753 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
754 {
755   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
756   int         ierr;
757 
758   PetscFunctionBegin;
759   if (aij->size == 1) {
760     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
761   }
762   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
768 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
769 {
770   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
771   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
772   int         ierr, format,shift = C->indexshift,rank;
773   FILE        *fd;
774   ViewerType  vtype;
775 
776   PetscFunctionBegin;
777   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
778   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
779     ierr = ViewerGetFormat(viewer,&format);
780     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
781       MatInfo info;
782       int     flg;
783       MPI_Comm_rank(mat->comm,&rank);
784       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
785       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
786       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
787       PetscSequentialPhaseBegin(mat->comm,1);
788       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
789          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
790       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
791          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
792       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
793       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
794       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
795       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
796       fflush(fd);
797       PetscSequentialPhaseEnd(mat->comm,1);
798       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
801       PetscFunctionReturn(0);
802     }
803   }
804 
805   if (vtype == DRAW_VIEWER) {
806     Draw       draw;
807     PetscTruth isnull;
808     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
809     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
810   }
811 
812   if (vtype == ASCII_FILE_VIEWER) {
813     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
814     PetscSequentialPhaseBegin(mat->comm,1);
815     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
816            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
817            aij->cend);
818     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
819     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
820     fflush(fd);
821     PetscSequentialPhaseEnd(mat->comm,1);
822   } else {
823     int size = aij->size;
824     rank = aij->rank;
825     if (size == 1) {
826       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
827     } else {
828       /* assemble the entire matrix onto first processor. */
829       Mat         A;
830       Mat_SeqAIJ *Aloc;
831       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
832       Scalar      *a;
833 
834       if (!rank) {
835         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
836       } else {
837         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
838       }
839       PLogObjectParent(mat,A);
840 
841       /* copy over the A part */
842       Aloc = (Mat_SeqAIJ*) aij->A->data;
843       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
844       row = aij->rstart;
845       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
846       for ( i=0; i<m; i++ ) {
847         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
848         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
849       }
850       aj = Aloc->j;
851       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
852 
853       /* copy over the B part */
854       Aloc = (Mat_SeqAIJ*) aij->B->data;
855       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
856       row = aij->rstart;
857       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
858       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
859       for ( i=0; i<m; i++ ) {
860         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
861         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
862       }
863       PetscFree(ct);
864       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
865       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
866       /*
867          Everyone has to call to draw the matrix since the graphics waits are
868          synchronized across all processors that share the Draw object
869       */
870       if (!rank || vtype == DRAW_VIEWER) {
871         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
872       }
873       ierr = MatDestroy(A); CHKERRQ(ierr);
874     }
875   }
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNC__
880 #define __FUNC__ "MatView_MPIAIJ"
881 int MatView_MPIAIJ(Mat mat,Viewer viewer)
882 {
883   int         ierr;
884   ViewerType  vtype;
885 
886   PetscFunctionBegin;
887   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
888   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
889       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
890     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
891   }
892   else if (vtype == BINARY_FILE_VIEWER) {
893     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 /*
899     This has to provide several versions.
900 
901      2) a) use only local smoothing updating outer values only once.
902         b) local smoothing updating outer values each inner iteration
903      3) color updating out values betwen colors.
904 */
905 #undef __FUNC__
906 #define __FUNC__ "MatRelax_MPIAIJ"
907 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
908                            double fshift,int its,Vec xx)
909 {
910   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
911   Mat        AA = mat->A, BB = mat->B;
912   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
913   Scalar     *b,*x,*xs,*ls,d,*v,sum;
914   int        ierr,*idx, *diag;
915   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
916 
917   PetscFunctionBegin;
918   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
919   xs = x + shift; /* shift by one for index start of 1 */
920   ls = ls + shift;
921   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
922   diag = A->diag;
923   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
924     if (flag & SOR_ZERO_INITIAL_GUESS) {
925       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
926       PetscFunctionReturn(0);
927     }
928     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
929     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
930     while (its--) {
931       /* go down through the rows */
932       for ( i=0; i<m; i++ ) {
933         n    = A->i[i+1] - A->i[i];
934         idx  = A->j + A->i[i] + shift;
935         v    = A->a + A->i[i] + shift;
936         sum  = b[i];
937         SPARSEDENSEMDOT(sum,xs,v,idx,n);
938         d    = fshift + A->a[diag[i]+shift];
939         n    = B->i[i+1] - B->i[i];
940         idx  = B->j + B->i[i] + shift;
941         v    = B->a + B->i[i] + shift;
942         SPARSEDENSEMDOT(sum,ls,v,idx,n);
943         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
944       }
945       /* come up through the rows */
946       for ( i=m-1; i>-1; i-- ) {
947         n    = A->i[i+1] - A->i[i];
948         idx  = A->j + A->i[i] + shift;
949         v    = A->a + A->i[i] + shift;
950         sum  = b[i];
951         SPARSEDENSEMDOT(sum,xs,v,idx,n);
952         d    = fshift + A->a[diag[i]+shift];
953         n    = B->i[i+1] - B->i[i];
954         idx  = B->j + B->i[i] + shift;
955         v    = B->a + B->i[i] + shift;
956         SPARSEDENSEMDOT(sum,ls,v,idx,n);
957         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
958       }
959     }
960   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
961     if (flag & SOR_ZERO_INITIAL_GUESS) {
962       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     }
965     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
966     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
967     while (its--) {
968       for ( i=0; i<m; i++ ) {
969         n    = A->i[i+1] - A->i[i];
970         idx  = A->j + A->i[i] + shift;
971         v    = A->a + A->i[i] + shift;
972         sum  = b[i];
973         SPARSEDENSEMDOT(sum,xs,v,idx,n);
974         d    = fshift + A->a[diag[i]+shift];
975         n    = B->i[i+1] - B->i[i];
976         idx  = B->j + B->i[i] + shift;
977         v    = B->a + B->i[i] + shift;
978         SPARSEDENSEMDOT(sum,ls,v,idx,n);
979         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
980       }
981     }
982   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
983     if (flag & SOR_ZERO_INITIAL_GUESS) {
984       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     }
987     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
988                             mat->Mvctx); CHKERRQ(ierr);
989     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
990                             mat->Mvctx); CHKERRQ(ierr);
991     while (its--) {
992       for ( i=m-1; i>-1; i-- ) {
993         n    = A->i[i+1] - A->i[i];
994         idx  = A->j + A->i[i] + shift;
995         v    = A->a + A->i[i] + shift;
996         sum  = b[i];
997         SPARSEDENSEMDOT(sum,xs,v,idx,n);
998         d    = fshift + A->a[diag[i]+shift];
999         n    = B->i[i+1] - B->i[i];
1000         idx  = B->j + B->i[i] + shift;
1001         v    = B->a + B->i[i] + shift;
1002         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1003         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1004       }
1005     }
1006   } else {
1007     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNC__
1013 #define __FUNC__ "MatGetInfo_MPIAIJ"
1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1015 {
1016   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1017   Mat        A = mat->A, B = mat->B;
1018   int        ierr;
1019   double     isend[5], irecv[5];
1020 
1021   PetscFunctionBegin;
1022   info->block_size     = 1.0;
1023   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1024   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1025   isend[3] = info->memory;  isend[4] = info->mallocs;
1026   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1027   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1028   isend[3] += info->memory;  isend[4] += info->mallocs;
1029   if (flag == MAT_LOCAL) {
1030     info->nz_used      = isend[0];
1031     info->nz_allocated = isend[1];
1032     info->nz_unneeded  = isend[2];
1033     info->memory       = isend[3];
1034     info->mallocs      = isend[4];
1035   } else if (flag == MAT_GLOBAL_MAX) {
1036     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1037     info->nz_used      = irecv[0];
1038     info->nz_allocated = irecv[1];
1039     info->nz_unneeded  = irecv[2];
1040     info->memory       = irecv[3];
1041     info->mallocs      = irecv[4];
1042   } else if (flag == MAT_GLOBAL_SUM) {
1043     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1044     info->nz_used      = irecv[0];
1045     info->nz_allocated = irecv[1];
1046     info->nz_unneeded  = irecv[2];
1047     info->memory       = irecv[3];
1048     info->mallocs      = irecv[4];
1049   }
1050   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1051   info->fill_ratio_needed = 0;
1052   info->factor_mallocs    = 0;
1053   info->rows_global       = (double)mat->M;
1054   info->columns_global    = (double)mat->N;
1055   info->rows_local        = (double)mat->m;
1056   info->columns_local     = (double)mat->N;
1057 
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNC__
1062 #define __FUNC__ "MatSetOption_MPIAIJ"
1063 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1064 {
1065   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1066 
1067   PetscFunctionBegin;
1068   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1069       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1070       op == MAT_COLUMNS_UNSORTED ||
1071       op == MAT_COLUMNS_SORTED ||
1072       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1073       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1074         MatSetOption(a->A,op);
1075         MatSetOption(a->B,op);
1076   } else if (op == MAT_ROW_ORIENTED) {
1077     a->roworiented = 1;
1078     MatSetOption(a->A,op);
1079     MatSetOption(a->B,op);
1080   } else if (op == MAT_ROWS_SORTED ||
1081              op == MAT_ROWS_UNSORTED ||
1082              op == MAT_SYMMETRIC ||
1083              op == MAT_STRUCTURALLY_SYMMETRIC ||
1084              op == MAT_YES_NEW_DIAGONALS)
1085     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1086   else if (op == MAT_COLUMN_ORIENTED) {
1087     a->roworiented = 0;
1088     MatSetOption(a->A,op);
1089     MatSetOption(a->B,op);
1090   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1091     a->donotstash = 1;
1092   } else if (op == MAT_NO_NEW_DIAGONALS){
1093     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1094   } else {
1095     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1096   }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatGetSize_MPIAIJ"
1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1105 
1106   PetscFunctionBegin;
1107   if (m) *m = mat->M;
1108   if (n) *n = mat->N;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNC__
1113 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1115 {
1116   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1117 
1118   PetscFunctionBegin;
1119   if (m) *m = mat->m;
1120   if (n) *n = mat->n;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNC__
1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1127 {
1128   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1129 
1130   PetscFunctionBegin;
1131   *m = mat->rstart; *n = mat->rend;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1136 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1137 
1138 #undef __FUNC__
1139 #define __FUNC__ "MatGetRow_MPIAIJ"
1140 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1141 {
1142   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1143   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1144   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1145   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1146   int        *cmap, *idx_p;
1147 
1148   PetscFunctionBegin;
1149   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1150   mat->getrowactive = PETSC_TRUE;
1151 
1152   if (!mat->rowvalues && (idx || v)) {
1153     /*
1154         allocate enough space to hold information from the longest row.
1155     */
1156     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1157     int     max = 1,m = mat->m,tmp;
1158     for ( i=0; i<m; i++ ) {
1159       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1160       if (max < tmp) { max = tmp; }
1161     }
1162     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1163     CHKPTRQ(mat->rowvalues);
1164     mat->rowindices = (int *) (mat->rowvalues + max);
1165   }
1166 
1167   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1168   lrow = row - rstart;
1169 
1170   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1171   if (!v)   {pvA = 0; pvB = 0;}
1172   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1173   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1174   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1175   nztot = nzA + nzB;
1176 
1177   cmap  = mat->garray;
1178   if (v  || idx) {
1179     if (nztot) {
1180       /* Sort by increasing column numbers, assuming A and B already sorted */
1181       int imark = -1;
1182       if (v) {
1183         *v = v_p = mat->rowvalues;
1184         for ( i=0; i<nzB; i++ ) {
1185           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1186           else break;
1187         }
1188         imark = i;
1189         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1190         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1191       }
1192       if (idx) {
1193         *idx = idx_p = mat->rowindices;
1194         if (imark > -1) {
1195           for ( i=0; i<imark; i++ ) {
1196             idx_p[i] = cmap[cworkB[i]];
1197           }
1198         } else {
1199           for ( i=0; i<nzB; i++ ) {
1200             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1201             else break;
1202           }
1203           imark = i;
1204         }
1205         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1206         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1207       }
1208     }
1209     else {
1210       if (idx) *idx = 0;
1211       if (v)   *v   = 0;
1212     }
1213   }
1214   *nz = nztot;
1215   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1216   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNC__
1221 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1222 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1223 {
1224   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1225 
1226   PetscFunctionBegin;
1227   if (aij->getrowactive == PETSC_FALSE) {
1228     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1229   }
1230   aij->getrowactive = PETSC_FALSE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNC__
1235 #define __FUNC__ "MatNorm_MPIAIJ"
1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1237 {
1238   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1239   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1240   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1241   double     sum = 0.0;
1242   Scalar     *v;
1243 
1244   PetscFunctionBegin;
1245   if (aij->size == 1) {
1246     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1247   } else {
1248     if (type == NORM_FROBENIUS) {
1249       v = amat->a;
1250       for (i=0; i<amat->nz; i++ ) {
1251 #if defined(USE_PETSC_COMPLEX)
1252         sum += real(conj(*v)*(*v)); v++;
1253 #else
1254         sum += (*v)*(*v); v++;
1255 #endif
1256       }
1257       v = bmat->a;
1258       for (i=0; i<bmat->nz; i++ ) {
1259 #if defined(USE_PETSC_COMPLEX)
1260         sum += real(conj(*v)*(*v)); v++;
1261 #else
1262         sum += (*v)*(*v); v++;
1263 #endif
1264       }
1265       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1266       *norm = sqrt(*norm);
1267     } else if (type == NORM_1) { /* max column norm */
1268       double *tmp, *tmp2;
1269       int    *jj, *garray = aij->garray;
1270       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1271       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1272       PetscMemzero(tmp,aij->N*sizeof(double));
1273       *norm = 0.0;
1274       v = amat->a; jj = amat->j;
1275       for ( j=0; j<amat->nz; j++ ) {
1276         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1277       }
1278       v = bmat->a; jj = bmat->j;
1279       for ( j=0; j<bmat->nz; j++ ) {
1280         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1281       }
1282       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1283       for ( j=0; j<aij->N; j++ ) {
1284         if (tmp2[j] > *norm) *norm = tmp2[j];
1285       }
1286       PetscFree(tmp); PetscFree(tmp2);
1287     } else if (type == NORM_INFINITY) { /* max row norm */
1288       double ntemp = 0.0;
1289       for ( j=0; j<amat->m; j++ ) {
1290         v = amat->a + amat->i[j] + shift;
1291         sum = 0.0;
1292         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1293           sum += PetscAbsScalar(*v); v++;
1294         }
1295         v = bmat->a + bmat->i[j] + shift;
1296         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1297           sum += PetscAbsScalar(*v); v++;
1298         }
1299         if (sum > ntemp) ntemp = sum;
1300       }
1301       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1302     } else {
1303       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1304     }
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNC__
1310 #define __FUNC__ "MatTranspose_MPIAIJ"
1311 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1312 {
1313   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1314   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1315   int        ierr,shift = Aloc->indexshift;
1316   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1317   Mat        B;
1318   Scalar     *array;
1319 
1320   PetscFunctionBegin;
1321   if (matout == PETSC_NULL && M != N) {
1322     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1323   }
1324 
1325   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1326 
1327   /* copy over the A part */
1328   Aloc = (Mat_SeqAIJ*) a->A->data;
1329   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1330   row = a->rstart;
1331   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1332   for ( i=0; i<m; i++ ) {
1333     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1334     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1335   }
1336   aj = Aloc->j;
1337   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1338 
1339   /* copy over the B part */
1340   Aloc = (Mat_SeqAIJ*) a->B->data;
1341   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1342   row = a->rstart;
1343   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1344   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1345   for ( i=0; i<m; i++ ) {
1346     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1347     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1348   }
1349   PetscFree(ct);
1350   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1351   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1352   if (matout != PETSC_NULL) {
1353     *matout = B;
1354   } else {
1355     PetscOps       *Abops;
1356     struct _MatOps *Aops;
1357 
1358     /* This isn't really an in-place transpose .... but free data structures from a */
1359     PetscFree(a->rowners);
1360     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1361     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1362     if (a->colmap) PetscFree(a->colmap);
1363     if (a->garray) PetscFree(a->garray);
1364     if (a->lvec) VecDestroy(a->lvec);
1365     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1366     PetscFree(a);
1367 
1368     /*
1369        This is horrible, horrible code. We need to keep the
1370       A pointers for the bops and ops but copy everything
1371       else from C.
1372     */
1373     Abops = A->bops;
1374     Aops  = A->ops;
1375     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1376     A->bops = Abops;
1377     A->ops  = Aops;
1378     PetscHeaderDestroy(B);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNC__
1384 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1385 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1386 {
1387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1388   Mat a = aij->A, b = aij->B;
1389   int ierr,s1,s2,s3;
1390 
1391   PetscFunctionBegin;
1392   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1393   if (rr) {
1394     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1395     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1396     /* Overlap communication with computation. */
1397     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1398   }
1399   if (ll) {
1400     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1401     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1402     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1403   }
1404   /* scale  the diagonal block */
1405   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1406 
1407   if (rr) {
1408     /* Do a scatter end and then right scale the off-diagonal block */
1409     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1410     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1411   }
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 
1417 extern int MatPrintHelp_SeqAIJ(Mat);
1418 #undef __FUNC__
1419 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1420 int MatPrintHelp_MPIAIJ(Mat A)
1421 {
1422   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1423   int        ierr;
1424 
1425   PetscFunctionBegin;
1426   if (!a->rank) {
1427     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNC__
1433 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1435 {
1436   PetscFunctionBegin;
1437   *bs = 1;
1438   PetscFunctionReturn(0);
1439 }
1440 #undef __FUNC__
1441 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1442 int MatSetUnfactored_MPIAIJ(Mat A)
1443 {
1444   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1445   int        ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNC__
1453 #define __FUNC__ "MatEqual_MPIAIJ"
1454 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1455 {
1456   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1457   Mat        a, b, c, d;
1458   PetscTruth flg;
1459   int        ierr;
1460 
1461   PetscFunctionBegin;
1462   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1463   a = matA->A; b = matA->B;
1464   c = matB->A; d = matB->B;
1465 
1466   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1467   if (flg == PETSC_TRUE) {
1468     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1469   }
1470   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1475 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1476 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1477 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1478 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1479 
1480 /* -------------------------------------------------------------------*/
1481 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1482        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1483        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1484        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1485        0,0,
1486        0,0,
1487        0,0,
1488        MatRelax_MPIAIJ,
1489        MatTranspose_MPIAIJ,
1490        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1491        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1492        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1493        0,
1494        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1495        0,0,0,0,
1496        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1497        0,0,
1498        0,0,MatConvertSameType_MPIAIJ,0,0,
1499        0,0,0,
1500        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1501        MatPrintHelp_MPIAIJ,
1502        MatScale_MPIAIJ,0,0,0,
1503        MatGetBlockSize_MPIAIJ,0,0,0,0,
1504        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1505        0,0,MatGetSubMatrix_MPIAIJ};
1506 
1507 
1508 #undef __FUNC__
1509 #define __FUNC__ "MatCreateMPIAIJ"
1510 /*@C
1511    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1512    (the default parallel PETSc format).  For good matrix assembly performance
1513    the user should preallocate the matrix storage by setting the parameters
1514    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1515    performance can be increased by more than a factor of 50.
1516 
1517    Input Parameters:
1518 .  comm - MPI communicator
1519 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1520            This value should be the same as the local size used in creating the
1521            y vector for the matrix-vector product y = Ax.
1522 .  n - This value should be the same as the local size used in creating the
1523        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1524        calculated if N is given) For square matrices n is almost always m.
1525 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1526 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1527 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1528            (same for all local rows)
1529 .  d_nzz - array containing the number of nonzeros in the various rows of the
1530            diagonal portion of the local submatrix (possibly different for each row)
1531            or PETSC_NULL. You must leave room for the diagonal entry even if
1532            it is zero.
1533 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1534            submatrix (same for all local rows).
1535 .  o_nzz - array containing the number of nonzeros in the various rows of the
1536            off-diagonal portion of the local submatrix (possibly different for
1537            each row) or PETSC_NULL.
1538 
1539    Output Parameter:
1540 .  A - the matrix
1541 
1542    Notes:
1543    The AIJ format (also called the Yale sparse matrix format or
1544    compressed row storage), is fully compatible with standard Fortran 77
1545    storage.  That is, the stored row and column indices can begin at
1546    either one (as in Fortran) or zero.  See the users manual for details.
1547 
1548    The user MUST specify either the local or global matrix dimensions
1549    (possibly both).
1550 
1551    By default, this format uses inodes (identical nodes) when possible.
1552    We search for consecutive rows with the same nonzero structure, thereby
1553    reusing matrix information to achieve increased efficiency.
1554 
1555    Options Database Keys:
1556 $    -mat_aij_no_inode  - Do not use inodes
1557 $    -mat_aij_inode_limit <limit> - Set inode limit.
1558 $        (max limit=5)
1559 $    -mat_aij_oneindex - Internally use indexing starting at 1
1560 $        rather than 0.  Note: When calling MatSetValues(),
1561 $        the user still MUST index entries starting at 0!
1562 
1563    Storage Information:
1564    For a square global matrix we define each processor's diagonal portion
1565    to be its local rows and the corresponding columns (a square submatrix);
1566    each processor's off-diagonal portion encompasses the remainder of the
1567    local matrix (a rectangular submatrix).
1568 
1569    The user can specify preallocated storage for the diagonal part of
1570    the local submatrix with either d_nz or d_nnz (not both).  Set
1571    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1572    memory allocation.  Likewise, specify preallocated storage for the
1573    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1574 
1575    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1576    the figure below we depict these three local rows and all columns (0-11).
1577 
1578 $          0 1 2 3 4 5 6 7 8 9 10 11
1579 $         -------------------
1580 $  row 3  |  o o o d d d o o o o o o
1581 $  row 4  |  o o o d d d o o o o o o
1582 $  row 5  |  o o o d d d o o o o o o
1583 $         -------------------
1584 $
1585 
1586    Thus, any entries in the d locations are stored in the d (diagonal)
1587    submatrix, and any entries in the o locations are stored in the
1588    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1589    stored simply in the MATSEQAIJ format for compressed row storage.
1590 
1591    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1592    and o_nz should indicate the number of nonzeros per row in the o matrix.
1593    In general, for PDE problems in which most nonzeros are near the diagonal,
1594    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1595    or you will get TERRIBLE performance; see the users' manual chapter on
1596    matrices.
1597 
1598 .keywords: matrix, aij, compressed row, sparse, parallel
1599 
1600 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1601 @*/
1602 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1603 {
1604   Mat          B;
1605   Mat_MPIAIJ   *b;
1606   int          ierr, i,sum[2],work[2],size;
1607 
1608   PetscFunctionBegin;
1609   MPI_Comm_size(comm,&size);
1610   if (size == 1) {
1611     if (M == PETSC_DECIDE) M = m;
1612     if (N == PETSC_DECIDE) N = n;
1613     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   *A = 0;
1618   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1619   PLogObjectCreate(B);
1620   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1621   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1622   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1623   B->ops->destroy    = MatDestroy_MPIAIJ;
1624   B->ops->view       = MatView_MPIAIJ;
1625   B->factor     = 0;
1626   B->assembled  = PETSC_FALSE;
1627   B->mapping    = 0;
1628 
1629   B->insertmode = NOT_SET_VALUES;
1630   b->size       = size;
1631   MPI_Comm_rank(comm,&b->rank);
1632 
1633   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1634     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1635   }
1636 
1637   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1638     work[0] = m; work[1] = n;
1639     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1640     if (M == PETSC_DECIDE) M = sum[0];
1641     if (N == PETSC_DECIDE) N = sum[1];
1642   }
1643   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1644   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1645   b->m = m; B->m = m;
1646   b->n = n; B->n = n;
1647   b->N = N; B->N = N;
1648   b->M = M; B->M = M;
1649 
1650   /* build local table of row and column ownerships */
1651   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1652   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1653   b->cowners = b->rowners + b->size + 2;
1654   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1655   b->rowners[0] = 0;
1656   for ( i=2; i<=b->size; i++ ) {
1657     b->rowners[i] += b->rowners[i-1];
1658   }
1659   b->rstart = b->rowners[b->rank];
1660   b->rend   = b->rowners[b->rank+1];
1661   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1662   b->cowners[0] = 0;
1663   for ( i=2; i<=b->size; i++ ) {
1664     b->cowners[i] += b->cowners[i-1];
1665   }
1666   b->cstart = b->cowners[b->rank];
1667   b->cend   = b->cowners[b->rank+1];
1668 
1669   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1670   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1671   PLogObjectParent(B,b->A);
1672   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1673   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1674   PLogObjectParent(B,b->B);
1675 
1676   /* build cache for off array entries formed */
1677   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1678   b->donotstash  = 0;
1679   b->colmap      = 0;
1680   b->garray      = 0;
1681   b->roworiented = 1;
1682 
1683   /* stuff used for matrix vector multiply */
1684   b->lvec      = 0;
1685   b->Mvctx     = 0;
1686 
1687   /* stuff for MatGetRow() */
1688   b->rowindices   = 0;
1689   b->rowvalues    = 0;
1690   b->getrowactive = PETSC_FALSE;
1691 
1692   *A = B;
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNC__
1697 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1698 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1699 {
1700   Mat        mat;
1701   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1702   int        ierr, len=0, flg;
1703 
1704   PetscFunctionBegin;
1705   *newmat       = 0;
1706   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1707   PLogObjectCreate(mat);
1708   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1709   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1710   mat->ops->destroy    = MatDestroy_MPIAIJ;
1711   mat->ops->view       = MatView_MPIAIJ;
1712   mat->factor     = matin->factor;
1713   mat->assembled  = PETSC_TRUE;
1714 
1715   a->m = mat->m   = oldmat->m;
1716   a->n = mat->n   = oldmat->n;
1717   a->M = mat->M   = oldmat->M;
1718   a->N = mat->N   = oldmat->N;
1719 
1720   a->rstart       = oldmat->rstart;
1721   a->rend         = oldmat->rend;
1722   a->cstart       = oldmat->cstart;
1723   a->cend         = oldmat->cend;
1724   a->size         = oldmat->size;
1725   a->rank         = oldmat->rank;
1726   mat->insertmode = NOT_SET_VALUES;
1727   a->rowvalues    = 0;
1728   a->getrowactive = PETSC_FALSE;
1729 
1730   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1731   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1732   a->cowners = a->rowners + a->size + 2;
1733   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1734   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1735   if (oldmat->colmap) {
1736     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1737     PLogObjectMemory(mat,(a->N)*sizeof(int));
1738     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1739   } else a->colmap = 0;
1740   if (oldmat->garray) {
1741     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1742     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1743     PLogObjectMemory(mat,len*sizeof(int));
1744     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1745   } else a->garray = 0;
1746 
1747   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1748   PLogObjectParent(mat,a->lvec);
1749   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1750   PLogObjectParent(mat,a->Mvctx);
1751   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1752   PLogObjectParent(mat,a->A);
1753   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1754   PLogObjectParent(mat,a->B);
1755   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1756   if (flg) {
1757     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1758   }
1759   *newmat = mat;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #include "sys.h"
1764 
1765 #undef __FUNC__
1766 #define __FUNC__ "MatLoad_MPIAIJ"
1767 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1768 {
1769   Mat          A;
1770   Scalar       *vals,*svals;
1771   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1772   MPI_Status   status;
1773   int          i, nz, ierr, j,rstart, rend, fd;
1774   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1775   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1776   int          tag = ((PetscObject)viewer)->tag;
1777 
1778   PetscFunctionBegin;
1779   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1780   if (!rank) {
1781     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1782     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1783     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1784     if (header[3] < 0) {
1785       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1786     }
1787   }
1788 
1789 
1790   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1791   M = header[1]; N = header[2];
1792   /* determine ownership of all rows */
1793   m = M/size + ((M % size) > rank);
1794   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1795   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1796   rowners[0] = 0;
1797   for ( i=2; i<=size; i++ ) {
1798     rowners[i] += rowners[i-1];
1799   }
1800   rstart = rowners[rank];
1801   rend   = rowners[rank+1];
1802 
1803   /* distribute row lengths to all processors */
1804   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1805   offlens = ourlens + (rend-rstart);
1806   if (!rank) {
1807     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1808     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1809     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1810     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1811     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1812     PetscFree(sndcounts);
1813   } else {
1814     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1815   }
1816 
1817   if (!rank) {
1818     /* calculate the number of nonzeros on each processor */
1819     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1820     PetscMemzero(procsnz,size*sizeof(int));
1821     for ( i=0; i<size; i++ ) {
1822       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1823         procsnz[i] += rowlengths[j];
1824       }
1825     }
1826     PetscFree(rowlengths);
1827 
1828     /* determine max buffer needed and allocate it */
1829     maxnz = 0;
1830     for ( i=0; i<size; i++ ) {
1831       maxnz = PetscMax(maxnz,procsnz[i]);
1832     }
1833     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1834 
1835     /* read in my part of the matrix column indices  */
1836     nz     = procsnz[0];
1837     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1838     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1839 
1840     /* read in every one elses and ship off */
1841     for ( i=1; i<size; i++ ) {
1842       nz   = procsnz[i];
1843       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1844       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1845     }
1846     PetscFree(cols);
1847   } else {
1848     /* determine buffer space needed for message */
1849     nz = 0;
1850     for ( i=0; i<m; i++ ) {
1851       nz += ourlens[i];
1852     }
1853     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1854 
1855     /* receive message of column indices*/
1856     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1857     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1858     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1859   }
1860 
1861   /* loop over local rows, determining number of off diagonal entries */
1862   PetscMemzero(offlens,m*sizeof(int));
1863   jj = 0;
1864   for ( i=0; i<m; i++ ) {
1865     for ( j=0; j<ourlens[i]; j++ ) {
1866       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1867       jj++;
1868     }
1869   }
1870 
1871   /* create our matrix */
1872   for ( i=0; i<m; i++ ) {
1873     ourlens[i] -= offlens[i];
1874   }
1875   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1876   A = *newmat;
1877   MatSetOption(A,MAT_COLUMNS_SORTED);
1878   for ( i=0; i<m; i++ ) {
1879     ourlens[i] += offlens[i];
1880   }
1881 
1882   if (!rank) {
1883     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1884 
1885     /* read in my part of the matrix numerical values  */
1886     nz = procsnz[0];
1887     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1888 
1889     /* insert into matrix */
1890     jj      = rstart;
1891     smycols = mycols;
1892     svals   = vals;
1893     for ( i=0; i<m; i++ ) {
1894       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1895       smycols += ourlens[i];
1896       svals   += ourlens[i];
1897       jj++;
1898     }
1899 
1900     /* read in other processors and ship out */
1901     for ( i=1; i<size; i++ ) {
1902       nz   = procsnz[i];
1903       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1904       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1905     }
1906     PetscFree(procsnz);
1907   } else {
1908     /* receive numeric values */
1909     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1910 
1911     /* receive message of values*/
1912     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1913     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1914     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1915 
1916     /* insert into matrix */
1917     jj      = rstart;
1918     smycols = mycols;
1919     svals   = vals;
1920     for ( i=0; i<m; i++ ) {
1921       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1922       smycols += ourlens[i];
1923       svals   += ourlens[i];
1924       jj++;
1925     }
1926   }
1927   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1928 
1929   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1930   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNC__
1935 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1936 /*
1937     Not great since it makes two copies of the submatrix, first an SeqAIJ
1938   in local and then by concatenating the local matrices the end result.
1939   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1940 */
1941 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1942 {
1943   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1944   Mat        *local,M;
1945   Scalar     *vwork,*aa;
1946   MPI_Comm   comm = mat->comm;
1947   Mat_SeqAIJ *aij;
1948   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1949 
1950   PetscFunctionBegin;
1951   MPI_Comm_rank(comm,&rank);
1952   MPI_Comm_size(comm,&size);
1953 
1954   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1955 
1956   /*
1957       m - number of local rows
1958       n - number of columns (same on all processors)
1959       rstart - first row in new global matrix generated
1960   */
1961   ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr);
1962   if (call == MAT_INITIAL_MATRIX) {
1963     aij = (Mat_SeqAIJ *) (*local)->data;
1964     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
1965     ii  = aij->i;
1966     jj  = aij->j;
1967 
1968     /*
1969         Determine the number of non-zeros in the diagonal and off-diagonal
1970         portions of the matrix in order to do correct preallocation
1971     */
1972 
1973     /* first get start and end of "diagonal" columns */
1974     if (csize == PETSC_DECIDE) {
1975       nlocal = n/size + ((n % size) > rank);
1976     } else {
1977       nlocal = csize;
1978     }
1979     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1980     rstart = rend - nlocal;
1981     if (rank == size - 1 && rend != n) {
1982       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
1983     }
1984 
1985     /* next, compute all the lengths */
1986     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
1987     olens = dlens + m;
1988     for ( i=0; i<m; i++ ) {
1989       jend = ii[i+1] - ii[i];
1990       olen = 0;
1991       dlen = 0;
1992       for ( j=0; j<jend; j++ ) {
1993         if ( *jj < rstart || *jj >= rend) olen++;
1994         else dlen++;
1995         jj++;
1996       }
1997       olens[i] = olen;
1998       dlens[i] = dlen;
1999     }
2000     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2001     PetscFree(dlens);
2002   } else {
2003     int ml,nl;
2004 
2005     M = *newmat;
2006     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2007     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2008     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2009   }
2010   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2011   aij = (Mat_SeqAIJ *) (*local)->data;
2012   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2013   ii  = aij->i;
2014   jj  = aij->j;
2015   aa  = aij->a;
2016   for (i=0; i<m; i++) {
2017     row   = rstart + i;
2018     nz    = ii[i+1] - ii[i];
2019     cwork = jj;     jj += nz;
2020     vwork = aa;     aa += nz;
2021     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2022   }
2023 
2024   ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr);
2025   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2026   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2027   *newmat = M;
2028   PetscFunctionReturn(0);
2029 }
2030