xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5cd905551efc76d042fcd2cc746dd1c50567705b)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.234 1998/04/03 23:15:06 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           }
238           else col = in[j];
239           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
240           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
241           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
242         }
243       }
244     }
245     else {
246       if (roworiented && !aij->donotstash) {
247         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
248       }
249       else {
250         if (!aij->donotstash) {
251           row = im[i];
252           for ( j=0; j<n; j++ ) {
253             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
254           }
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatGetValues_MPIAIJ"
264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
265 {
266   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
267   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
268   int        cstart = aij->cstart, cend = aij->cend,row,col;
269 
270   PetscFunctionBegin;
271   for ( i=0; i<m; i++ ) {
272     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
273     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
274     if (idxm[i] >= rstart && idxm[i] < rend) {
275       row = idxm[i] - rstart;
276       for ( j=0; j<n; j++ ) {
277         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
278         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
279         if (idxn[j] >= cstart && idxn[j] < cend){
280           col = idxn[j] - cstart;
281           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
282         }
283         else {
284           if (!aij->colmap) {
285             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
286           }
287           col = aij->colmap[idxn[j]] - 1;
288           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
289           else {
290             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
291           }
292         }
293       }
294     } else {
295       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   PetscFunctionBegin;
315   /* make sure all processors are either in INSERTMODE or ADDMODE */
316   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
317   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
319   }
320   mat->insertmode = addv; /* in case this processor had no cache */
321 
322   /*  first count number of contributors to each processor */
323   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
325   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
326   for ( i=0; i<aij->stash.n; i++ ) {
327     idx = aij->stash.idx[i];
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
331       }
332     }
333   }
334   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
335 
336   /* inform other processors of number of messages and max length*/
337   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
339   nreceives = work[rank];
340   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
341   nmax = work[rank];
342   PetscFree(work);
343 
344   /* post receives:
345        1) each message will consist of ordered pairs
346      (global index,value) we store the global index as a double
347      to simplify the message passing.
348        2) since we don't know how long each individual message is we
349      allocate the largest needed buffer for each receive. Potentially
350      this is a lot of wasted space.
351 
352 
353        This could be done better.
354   */
355   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
357   for ( i=0; i<nreceives; i++ ) {
358     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
359               comm,recv_waits+i);CHKERRQ(ierr);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
367   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
368   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
369   starts[0]  = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<aij->stash.n; i++ ) {
372     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
373     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
374     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
375   }
376   PetscFree(owner);
377   starts[0] = 0;
378   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<size; i++ ) {
381     if (procs[i]) {
382       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   PetscFunctionReturn(0);
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   PetscFunctionBegin;
412   /*  wait on receives */
413   while (count) {
414     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
415     /* unpack receives into our local space */
416     values = aij->rvalues + 3*imdex*aij->rmax;
417     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
418     n = n/3;
419     for ( i=0; i<n; i++ ) {
420       row = (int) PetscReal(values[3*i]) - aij->rstart;
421       col = (int) PetscReal(values[3*i+1]);
422       val = values[3*i+2];
423       if (col >= aij->cstart && col < aij->cend) {
424         col -= aij->cstart;
425         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
426       } else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478 
479   PetscFunctionBegin;
480   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
481   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNC__
486 #define __FUNC__ "MatZeroRows_MPIAIJ"
487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
488 {
489   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
490   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
491   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
492   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
493   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
494   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
495   MPI_Comm       comm = A->comm;
496   MPI_Request    *send_waits,*recv_waits;
497   MPI_Status     recv_status,*send_status;
498   IS             istmp;
499 
500   PetscFunctionBegin;
501   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
502   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
503 
504   /*  first count number of contributors to each processor */
505   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
506   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
507   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
508   for ( i=0; i<N; i++ ) {
509     idx = rows[i];
510     found = 0;
511     for ( j=0; j<size; j++ ) {
512       if (idx >= owners[j] && idx < owners[j+1]) {
513         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
514       }
515     }
516     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
517   }
518   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
519 
520   /* inform other processors of number of messages and max length*/
521   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
522   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
523   nrecvs = work[rank];
524   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
525   nmax = work[rank];
526   PetscFree(work);
527 
528   /* post receives:   */
529   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
530   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
531   for ( i=0; i<nrecvs; i++ ) {
532     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
533   }
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
540   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
541   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
542   starts[0] = 0;
543   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
544   for ( i=0; i<N; i++ ) {
545     svalues[starts[owner[i]]++] = rows[i];
546   }
547   ISRestoreIndices(is,&rows);
548 
549   starts[0] = 0;
550   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
551   count = 0;
552   for ( i=0; i<size; i++ ) {
553     if (procs[i]) {
554       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
555     }
556   }
557   PetscFree(starts);
558 
559   base = owners[rank];
560 
561   /*  wait on receives */
562   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
563   source = lens + nrecvs;
564   count  = nrecvs; slen = 0;
565   while (count) {
566     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
567     /* unpack receives into our local space */
568     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
569     source[imdex]  = recv_status.MPI_SOURCE;
570     lens[imdex]  = n;
571     slen += n;
572     count--;
573   }
574   PetscFree(recv_waits);
575 
576   /* move the data into the send scatter */
577   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
578   count = 0;
579   for ( i=0; i<nrecvs; i++ ) {
580     values = rvalues + i*nmax;
581     for ( j=0; j<lens[i]; j++ ) {
582       lrows[count++] = values[j] - base;
583     }
584   }
585   PetscFree(rvalues); PetscFree(lens);
586   PetscFree(owner); PetscFree(nprocs);
587 
588   /* actually zap the local rows */
589   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
590   PLogObjectParent(A,istmp);
591 
592   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
593   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
594   ierr = ISDestroy(istmp); CHKERRQ(ierr);
595 
596   if (diag) {
597     for ( i = 0; i < slen; i++ ) {
598       row = lrows[i] + rstart;
599       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
600     }
601     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
602     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
603   }
604   PetscFree(lrows);
605  /* wait on sends */
606   if (nsends) {
607     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
608     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
609     PetscFree(send_status);
610   }
611   PetscFree(send_waits); PetscFree(svalues);
612 
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNC__
617 #define __FUNC__ "MatMult_MPIAIJ"
618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
619 {
620   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
621   int        ierr,nt;
622 
623   PetscFunctionBegin;
624   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
625   if (nt != a->n) {
626     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
627   }
628   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
629   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
630   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
631   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatMultAdd_MPIAIJ"
637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
638 {
639   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
640   int        ierr;
641 
642   PetscFunctionBegin;
643   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
644   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
645   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
646   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNC__
651 #define __FUNC__ "MatMultTrans_MPIAIJ"
652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
653 {
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655   int        ierr;
656 
657   PetscFunctionBegin;
658   /* do nondiagonal part */
659   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
660   /* send it on its way */
661   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
662   /* do local part */
663   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
664   /* receive remote parts: note this assumes the values are not actually */
665   /* inserted in yy until the next line, which is true for my implementation*/
666   /* but is not perhaps always true. */
667   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676   int        ierr;
677 
678   PetscFunctionBegin;
679   /* do nondiagonal part */
680   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
681   /* send it on its way */
682   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
683   /* do local part */
684   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
685   /* receive remote parts: note this assumes the values are not actually */
686   /* inserted in yy until the next line, which is true for my implementation*/
687   /* but is not perhaps always true. */
688   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 /*
693   This only works correctly for square matrices where the subblock A->A is the
694    diagonal block
695 */
696 #undef __FUNC__
697 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
699 {
700   int        ierr;
701   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
702 
703   PetscFunctionBegin;
704   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
705   if (a->rstart != a->cstart || a->rend != a->cend) {
706     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
707   }
708   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatScale_MPIAIJ"
714 int MatScale_MPIAIJ(Scalar *aa,Mat A)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
717   int        ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
721   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatDestroy_MPIAIJ"
727 int MatDestroy_MPIAIJ(Mat mat)
728 {
729   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
730   int        ierr;
731 
732   PetscFunctionBegin;
733 #if defined(USE_PETSC_LOG)
734   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
735 #endif
736   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
737   PetscFree(aij->rowners);
738   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
739   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
740   if (aij->colmap) PetscFree(aij->colmap);
741   if (aij->garray) PetscFree(aij->garray);
742   if (aij->lvec)   VecDestroy(aij->lvec);
743   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
744   if (aij->rowvalues) PetscFree(aij->rowvalues);
745   PetscFree(aij);
746   PLogObjectDestroy(mat);
747   PetscHeaderDestroy(mat);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNC__
752 #define __FUNC__ "MatView_MPIAIJ_Binary"
753 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
754 {
755   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
756   int         ierr;
757 
758   PetscFunctionBegin;
759   if (aij->size == 1) {
760     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
761   }
762   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
768 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
769 {
770   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
771   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
772   int         ierr, format,shift = C->indexshift,rank;
773   FILE        *fd;
774   ViewerType  vtype;
775 
776   PetscFunctionBegin;
777   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
778   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
779     ierr = ViewerGetFormat(viewer,&format);
780     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
781       MatInfo info;
782       int     flg;
783       MPI_Comm_rank(mat->comm,&rank);
784       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
785       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
786       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
787       PetscSequentialPhaseBegin(mat->comm,1);
788       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
789          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
790       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
791          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
792       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
793       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
794       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
795       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
796       fflush(fd);
797       PetscSequentialPhaseEnd(mat->comm,1);
798       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
801       PetscFunctionReturn(0);
802     }
803   }
804 
805   if (vtype == DRAW_VIEWER) {
806     Draw       draw;
807     PetscTruth isnull;
808     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
809     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
810   }
811 
812   if (vtype == ASCII_FILE_VIEWER) {
813     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
814     PetscSequentialPhaseBegin(mat->comm,1);
815     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
816            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
817            aij->cend);
818     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
819     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
820     fflush(fd);
821     PetscSequentialPhaseEnd(mat->comm,1);
822   } else {
823     int size = aij->size;
824     rank = aij->rank;
825     if (size == 1) {
826       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
827     } else {
828       /* assemble the entire matrix onto first processor. */
829       Mat         A;
830       Mat_SeqAIJ *Aloc;
831       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
832       Scalar      *a;
833 
834       if (!rank) {
835         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
836       } else {
837         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
838       }
839       PLogObjectParent(mat,A);
840 
841       /* copy over the A part */
842       Aloc = (Mat_SeqAIJ*) aij->A->data;
843       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
844       row = aij->rstart;
845       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
846       for ( i=0; i<m; i++ ) {
847         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
848         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
849       }
850       aj = Aloc->j;
851       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
852 
853       /* copy over the B part */
854       Aloc = (Mat_SeqAIJ*) aij->B->data;
855       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
856       row = aij->rstart;
857       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
858       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
859       for ( i=0; i<m; i++ ) {
860         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
861         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
862       }
863       PetscFree(ct);
864       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
865       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
866       /*
867          Everyone has to call to draw the matrix since the graphics waits are
868          synchronized across all processors that share the Draw object
869       */
870       if (!rank || vtype == DRAW_VIEWER) {
871         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
872       }
873       ierr = MatDestroy(A); CHKERRQ(ierr);
874     }
875   }
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNC__
880 #define __FUNC__ "MatView_MPIAIJ"
881 int MatView_MPIAIJ(Mat mat,Viewer viewer)
882 {
883   int         ierr;
884   ViewerType  vtype;
885 
886   PetscFunctionBegin;
887   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
888   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
889       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
890     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
891   } else if (vtype == BINARY_FILE_VIEWER) {
892     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
893   } else {
894     SETERRQ(1,1,"Viewer type not supported by PETSc object");
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 /*
900     This has to provide several versions.
901 
902      2) a) use only local smoothing updating outer values only once.
903         b) local smoothing updating outer values each inner iteration
904      3) color updating out values betwen colors.
905 */
906 #undef __FUNC__
907 #define __FUNC__ "MatRelax_MPIAIJ"
908 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
909                            double fshift,int its,Vec xx)
910 {
911   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
912   Mat        AA = mat->A, BB = mat->B;
913   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
914   Scalar     *b,*x,*xs,*ls,d,*v,sum;
915   int        ierr,*idx, *diag;
916   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
917 
918   PetscFunctionBegin;
919   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
920   xs = x + shift; /* shift by one for index start of 1 */
921   ls = ls + shift;
922   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
923   diag = A->diag;
924   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
925     if (flag & SOR_ZERO_INITIAL_GUESS) {
926       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
927       PetscFunctionReturn(0);
928     }
929     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
930     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
931     while (its--) {
932       /* go down through the rows */
933       for ( i=0; i<m; i++ ) {
934         n    = A->i[i+1] - A->i[i];
935         idx  = A->j + A->i[i] + shift;
936         v    = A->a + A->i[i] + shift;
937         sum  = b[i];
938         SPARSEDENSEMDOT(sum,xs,v,idx,n);
939         d    = fshift + A->a[diag[i]+shift];
940         n    = B->i[i+1] - B->i[i];
941         idx  = B->j + B->i[i] + shift;
942         v    = B->a + B->i[i] + shift;
943         SPARSEDENSEMDOT(sum,ls,v,idx,n);
944         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
945       }
946       /* come up through the rows */
947       for ( i=m-1; i>-1; i-- ) {
948         n    = A->i[i+1] - A->i[i];
949         idx  = A->j + A->i[i] + shift;
950         v    = A->a + A->i[i] + shift;
951         sum  = b[i];
952         SPARSEDENSEMDOT(sum,xs,v,idx,n);
953         d    = fshift + A->a[diag[i]+shift];
954         n    = B->i[i+1] - B->i[i];
955         idx  = B->j + B->i[i] + shift;
956         v    = B->a + B->i[i] + shift;
957         SPARSEDENSEMDOT(sum,ls,v,idx,n);
958         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
959       }
960     }
961   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
962     if (flag & SOR_ZERO_INITIAL_GUESS) {
963       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
964       PetscFunctionReturn(0);
965     }
966     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
967     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
968     while (its--) {
969       for ( i=0; i<m; i++ ) {
970         n    = A->i[i+1] - A->i[i];
971         idx  = A->j + A->i[i] + shift;
972         v    = A->a + A->i[i] + shift;
973         sum  = b[i];
974         SPARSEDENSEMDOT(sum,xs,v,idx,n);
975         d    = fshift + A->a[diag[i]+shift];
976         n    = B->i[i+1] - B->i[i];
977         idx  = B->j + B->i[i] + shift;
978         v    = B->a + B->i[i] + shift;
979         SPARSEDENSEMDOT(sum,ls,v,idx,n);
980         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
981       }
982     }
983   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
984     if (flag & SOR_ZERO_INITIAL_GUESS) {
985       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
986       PetscFunctionReturn(0);
987     }
988     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
989                             mat->Mvctx); CHKERRQ(ierr);
990     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
991                             mat->Mvctx); CHKERRQ(ierr);
992     while (its--) {
993       for ( i=m-1; i>-1; i-- ) {
994         n    = A->i[i+1] - A->i[i];
995         idx  = A->j + A->i[i] + shift;
996         v    = A->a + A->i[i] + shift;
997         sum  = b[i];
998         SPARSEDENSEMDOT(sum,xs,v,idx,n);
999         d    = fshift + A->a[diag[i]+shift];
1000         n    = B->i[i+1] - B->i[i];
1001         idx  = B->j + B->i[i] + shift;
1002         v    = B->a + B->i[i] + shift;
1003         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1004         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1005       }
1006     }
1007   } else {
1008     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNC__
1014 #define __FUNC__ "MatGetInfo_MPIAIJ"
1015 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1016 {
1017   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1018   Mat        A = mat->A, B = mat->B;
1019   int        ierr;
1020   double     isend[5], irecv[5];
1021 
1022   PetscFunctionBegin;
1023   info->block_size     = 1.0;
1024   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1025   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1026   isend[3] = info->memory;  isend[4] = info->mallocs;
1027   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1028   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1029   isend[3] += info->memory;  isend[4] += info->mallocs;
1030   if (flag == MAT_LOCAL) {
1031     info->nz_used      = isend[0];
1032     info->nz_allocated = isend[1];
1033     info->nz_unneeded  = isend[2];
1034     info->memory       = isend[3];
1035     info->mallocs      = isend[4];
1036   } else if (flag == MAT_GLOBAL_MAX) {
1037     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1038     info->nz_used      = irecv[0];
1039     info->nz_allocated = irecv[1];
1040     info->nz_unneeded  = irecv[2];
1041     info->memory       = irecv[3];
1042     info->mallocs      = irecv[4];
1043   } else if (flag == MAT_GLOBAL_SUM) {
1044     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1045     info->nz_used      = irecv[0];
1046     info->nz_allocated = irecv[1];
1047     info->nz_unneeded  = irecv[2];
1048     info->memory       = irecv[3];
1049     info->mallocs      = irecv[4];
1050   }
1051   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1052   info->fill_ratio_needed = 0;
1053   info->factor_mallocs    = 0;
1054   info->rows_global       = (double)mat->M;
1055   info->columns_global    = (double)mat->N;
1056   info->rows_local        = (double)mat->m;
1057   info->columns_local     = (double)mat->N;
1058 
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNC__
1063 #define __FUNC__ "MatSetOption_MPIAIJ"
1064 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1065 {
1066   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1067 
1068   PetscFunctionBegin;
1069   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1070       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1071       op == MAT_COLUMNS_UNSORTED ||
1072       op == MAT_COLUMNS_SORTED ||
1073       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1074       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1075         MatSetOption(a->A,op);
1076         MatSetOption(a->B,op);
1077   } else if (op == MAT_ROW_ORIENTED) {
1078     a->roworiented = 1;
1079     MatSetOption(a->A,op);
1080     MatSetOption(a->B,op);
1081   } else if (op == MAT_ROWS_SORTED ||
1082              op == MAT_ROWS_UNSORTED ||
1083              op == MAT_SYMMETRIC ||
1084              op == MAT_STRUCTURALLY_SYMMETRIC ||
1085              op == MAT_YES_NEW_DIAGONALS)
1086     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1087   else if (op == MAT_COLUMN_ORIENTED) {
1088     a->roworiented = 0;
1089     MatSetOption(a->A,op);
1090     MatSetOption(a->B,op);
1091   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1092     a->donotstash = 1;
1093   } else if (op == MAT_NO_NEW_DIAGONALS){
1094     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1095   } else {
1096     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNC__
1102 #define __FUNC__ "MatGetSize_MPIAIJ"
1103 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1104 {
1105   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1106 
1107   PetscFunctionBegin;
1108   if (m) *m = mat->M;
1109   if (n) *n = mat->N;
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNC__
1114 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1115 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1116 {
1117   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1118 
1119   PetscFunctionBegin;
1120   if (m) *m = mat->m;
1121   if (n) *n = mat->n;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNC__
1126 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1127 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1128 {
1129   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1130 
1131   PetscFunctionBegin;
1132   *m = mat->rstart; *n = mat->rend;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1137 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1138 
1139 #undef __FUNC__
1140 #define __FUNC__ "MatGetRow_MPIAIJ"
1141 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1142 {
1143   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1144   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1145   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1146   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1147   int        *cmap, *idx_p;
1148 
1149   PetscFunctionBegin;
1150   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1151   mat->getrowactive = PETSC_TRUE;
1152 
1153   if (!mat->rowvalues && (idx || v)) {
1154     /*
1155         allocate enough space to hold information from the longest row.
1156     */
1157     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1158     int     max = 1,m = mat->m,tmp;
1159     for ( i=0; i<m; i++ ) {
1160       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1161       if (max < tmp) { max = tmp; }
1162     }
1163     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1164     CHKPTRQ(mat->rowvalues);
1165     mat->rowindices = (int *) (mat->rowvalues + max);
1166   }
1167 
1168   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1169   lrow = row - rstart;
1170 
1171   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1172   if (!v)   {pvA = 0; pvB = 0;}
1173   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1174   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1175   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1176   nztot = nzA + nzB;
1177 
1178   cmap  = mat->garray;
1179   if (v  || idx) {
1180     if (nztot) {
1181       /* Sort by increasing column numbers, assuming A and B already sorted */
1182       int imark = -1;
1183       if (v) {
1184         *v = v_p = mat->rowvalues;
1185         for ( i=0; i<nzB; i++ ) {
1186           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1187           else break;
1188         }
1189         imark = i;
1190         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1191         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1192       }
1193       if (idx) {
1194         *idx = idx_p = mat->rowindices;
1195         if (imark > -1) {
1196           for ( i=0; i<imark; i++ ) {
1197             idx_p[i] = cmap[cworkB[i]];
1198           }
1199         } else {
1200           for ( i=0; i<nzB; i++ ) {
1201             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1202             else break;
1203           }
1204           imark = i;
1205         }
1206         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1207         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1208       }
1209     }
1210     else {
1211       if (idx) *idx = 0;
1212       if (v)   *v   = 0;
1213     }
1214   }
1215   *nz = nztot;
1216   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1217   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNC__
1222 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1223 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1224 {
1225   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1226 
1227   PetscFunctionBegin;
1228   if (aij->getrowactive == PETSC_FALSE) {
1229     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1230   }
1231   aij->getrowactive = PETSC_FALSE;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNC__
1236 #define __FUNC__ "MatNorm_MPIAIJ"
1237 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1238 {
1239   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1240   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1241   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1242   double     sum = 0.0;
1243   Scalar     *v;
1244 
1245   PetscFunctionBegin;
1246   if (aij->size == 1) {
1247     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1248   } else {
1249     if (type == NORM_FROBENIUS) {
1250       v = amat->a;
1251       for (i=0; i<amat->nz; i++ ) {
1252 #if defined(USE_PETSC_COMPLEX)
1253         sum += real(conj(*v)*(*v)); v++;
1254 #else
1255         sum += (*v)*(*v); v++;
1256 #endif
1257       }
1258       v = bmat->a;
1259       for (i=0; i<bmat->nz; i++ ) {
1260 #if defined(USE_PETSC_COMPLEX)
1261         sum += real(conj(*v)*(*v)); v++;
1262 #else
1263         sum += (*v)*(*v); v++;
1264 #endif
1265       }
1266       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1267       *norm = sqrt(*norm);
1268     } else if (type == NORM_1) { /* max column norm */
1269       double *tmp, *tmp2;
1270       int    *jj, *garray = aij->garray;
1271       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1272       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1273       PetscMemzero(tmp,aij->N*sizeof(double));
1274       *norm = 0.0;
1275       v = amat->a; jj = amat->j;
1276       for ( j=0; j<amat->nz; j++ ) {
1277         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1278       }
1279       v = bmat->a; jj = bmat->j;
1280       for ( j=0; j<bmat->nz; j++ ) {
1281         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1282       }
1283       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1284       for ( j=0; j<aij->N; j++ ) {
1285         if (tmp2[j] > *norm) *norm = tmp2[j];
1286       }
1287       PetscFree(tmp); PetscFree(tmp2);
1288     } else if (type == NORM_INFINITY) { /* max row norm */
1289       double ntemp = 0.0;
1290       for ( j=0; j<amat->m; j++ ) {
1291         v = amat->a + amat->i[j] + shift;
1292         sum = 0.0;
1293         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1294           sum += PetscAbsScalar(*v); v++;
1295         }
1296         v = bmat->a + bmat->i[j] + shift;
1297         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1298           sum += PetscAbsScalar(*v); v++;
1299         }
1300         if (sum > ntemp) ntemp = sum;
1301       }
1302       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1303     } else {
1304       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1305     }
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNC__
1311 #define __FUNC__ "MatTranspose_MPIAIJ"
1312 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1313 {
1314   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1315   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1316   int        ierr,shift = Aloc->indexshift;
1317   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1318   Mat        B;
1319   Scalar     *array;
1320 
1321   PetscFunctionBegin;
1322   if (matout == PETSC_NULL && M != N) {
1323     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1324   }
1325 
1326   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1327 
1328   /* copy over the A part */
1329   Aloc = (Mat_SeqAIJ*) a->A->data;
1330   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1331   row = a->rstart;
1332   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1333   for ( i=0; i<m; i++ ) {
1334     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1335     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1336   }
1337   aj = Aloc->j;
1338   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1339 
1340   /* copy over the B part */
1341   Aloc = (Mat_SeqAIJ*) a->B->data;
1342   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1343   row = a->rstart;
1344   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1345   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1346   for ( i=0; i<m; i++ ) {
1347     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1348     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1349   }
1350   PetscFree(ct);
1351   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1352   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1353   if (matout != PETSC_NULL) {
1354     *matout = B;
1355   } else {
1356     PetscOps       *Abops;
1357     struct _MatOps *Aops;
1358 
1359     /* This isn't really an in-place transpose .... but free data structures from a */
1360     PetscFree(a->rowners);
1361     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1362     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1363     if (a->colmap) PetscFree(a->colmap);
1364     if (a->garray) PetscFree(a->garray);
1365     if (a->lvec) VecDestroy(a->lvec);
1366     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1367     PetscFree(a);
1368 
1369     /*
1370        This is horrible, horrible code. We need to keep the
1371       A pointers for the bops and ops but copy everything
1372       else from C.
1373     */
1374     Abops = A->bops;
1375     Aops  = A->ops;
1376     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1377     A->bops = Abops;
1378     A->ops  = Aops;
1379     PetscHeaderDestroy(B);
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNC__
1385 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1387 {
1388   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1389   Mat a = aij->A, b = aij->B;
1390   int ierr,s1,s2,s3;
1391 
1392   PetscFunctionBegin;
1393   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1394   if (rr) {
1395     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1396     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1399   }
1400   if (ll) {
1401     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1402     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1403     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1404   }
1405   /* scale  the diagonal block */
1406   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1407 
1408   if (rr) {
1409     /* Do a scatter end and then right scale the off-diagonal block */
1410     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1411     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1412   }
1413 
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 
1418 extern int MatPrintHelp_SeqAIJ(Mat);
1419 #undef __FUNC__
1420 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1421 int MatPrintHelp_MPIAIJ(Mat A)
1422 {
1423   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1424   int        ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!a->rank) {
1428     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNC__
1434 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1435 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1436 {
1437   PetscFunctionBegin;
1438   *bs = 1;
1439   PetscFunctionReturn(0);
1440 }
1441 #undef __FUNC__
1442 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1443 int MatSetUnfactored_MPIAIJ(Mat A)
1444 {
1445   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1446   int        ierr;
1447 
1448   PetscFunctionBegin;
1449   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNC__
1454 #define __FUNC__ "MatEqual_MPIAIJ"
1455 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1456 {
1457   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1458   Mat        a, b, c, d;
1459   PetscTruth flg;
1460   int        ierr;
1461 
1462   PetscFunctionBegin;
1463   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1464   a = matA->A; b = matA->B;
1465   c = matB->A; d = matB->B;
1466 
1467   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1468   if (flg == PETSC_TRUE) {
1469     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1470   }
1471   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1476 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1477 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1478 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1479 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1480 
1481 /* -------------------------------------------------------------------*/
1482 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1483        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1484        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1485        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1486        0,0,
1487        0,0,
1488        0,0,
1489        MatRelax_MPIAIJ,
1490        MatTranspose_MPIAIJ,
1491        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1492        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1493        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1494        0,
1495        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1496        0,0,0,0,
1497        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1498        0,0,
1499        0,0,MatConvertSameType_MPIAIJ,0,0,
1500        0,0,0,
1501        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1502        MatPrintHelp_MPIAIJ,
1503        MatScale_MPIAIJ,0,0,0,
1504        MatGetBlockSize_MPIAIJ,0,0,0,0,
1505        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1506        0,0,MatGetSubMatrix_MPIAIJ};
1507 
1508 
1509 #undef __FUNC__
1510 #define __FUNC__ "MatCreateMPIAIJ"
1511 /*@C
1512    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1513    (the default parallel PETSc format).  For good matrix assembly performance
1514    the user should preallocate the matrix storage by setting the parameters
1515    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1516    performance can be increased by more than a factor of 50.
1517 
1518    Input Parameters:
1519 .  comm - MPI communicator
1520 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1521            This value should be the same as the local size used in creating the
1522            y vector for the matrix-vector product y = Ax.
1523 .  n - This value should be the same as the local size used in creating the
1524        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1525        calculated if N is given) For square matrices n is almost always m.
1526 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1527 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1528 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1529            (same for all local rows)
1530 .  d_nzz - array containing the number of nonzeros in the various rows of the
1531            diagonal portion of the local submatrix (possibly different for each row)
1532            or PETSC_NULL. You must leave room for the diagonal entry even if
1533            it is zero.
1534 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1535            submatrix (same for all local rows).
1536 .  o_nzz - array containing the number of nonzeros in the various rows of the
1537            off-diagonal portion of the local submatrix (possibly different for
1538            each row) or PETSC_NULL.
1539 
1540    Output Parameter:
1541 .  A - the matrix
1542 
1543    Notes:
1544    The AIJ format (also called the Yale sparse matrix format or
1545    compressed row storage), is fully compatible with standard Fortran 77
1546    storage.  That is, the stored row and column indices can begin at
1547    either one (as in Fortran) or zero.  See the users manual for details.
1548 
1549    The user MUST specify either the local or global matrix dimensions
1550    (possibly both).
1551 
1552    By default, this format uses inodes (identical nodes) when possible.
1553    We search for consecutive rows with the same nonzero structure, thereby
1554    reusing matrix information to achieve increased efficiency.
1555 
1556    Options Database Keys:
1557 $    -mat_aij_no_inode  - Do not use inodes
1558 $    -mat_aij_inode_limit <limit> - Set inode limit.
1559 $        (max limit=5)
1560 $    -mat_aij_oneindex - Internally use indexing starting at 1
1561 $        rather than 0.  Note: When calling MatSetValues(),
1562 $        the user still MUST index entries starting at 0!
1563 
1564    Storage Information:
1565    For a square global matrix we define each processor's diagonal portion
1566    to be its local rows and the corresponding columns (a square submatrix);
1567    each processor's off-diagonal portion encompasses the remainder of the
1568    local matrix (a rectangular submatrix).
1569 
1570    The user can specify preallocated storage for the diagonal part of
1571    the local submatrix with either d_nz or d_nnz (not both).  Set
1572    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1573    memory allocation.  Likewise, specify preallocated storage for the
1574    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1575 
1576    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1577    the figure below we depict these three local rows and all columns (0-11).
1578 
1579 $          0 1 2 3 4 5 6 7 8 9 10 11
1580 $         -------------------
1581 $  row 3  |  o o o d d d o o o o o o
1582 $  row 4  |  o o o d d d o o o o o o
1583 $  row 5  |  o o o d d d o o o o o o
1584 $         -------------------
1585 $
1586 
1587    Thus, any entries in the d locations are stored in the d (diagonal)
1588    submatrix, and any entries in the o locations are stored in the
1589    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1590    stored simply in the MATSEQAIJ format for compressed row storage.
1591 
1592    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1593    and o_nz should indicate the number of nonzeros per row in the o matrix.
1594    In general, for PDE problems in which most nonzeros are near the diagonal,
1595    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1596    or you will get TERRIBLE performance; see the users' manual chapter on
1597    matrices.
1598 
1599 .keywords: matrix, aij, compressed row, sparse, parallel
1600 
1601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1602 @*/
1603 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1604 {
1605   Mat          B;
1606   Mat_MPIAIJ   *b;
1607   int          ierr, i,sum[2],work[2],size;
1608 
1609   PetscFunctionBegin;
1610   MPI_Comm_size(comm,&size);
1611   if (size == 1) {
1612     if (M == PETSC_DECIDE) M = m;
1613     if (N == PETSC_DECIDE) N = n;
1614     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1615     PetscFunctionReturn(0);
1616   }
1617 
1618   *A = 0;
1619   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1620   PLogObjectCreate(B);
1621   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1622   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1623   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1624   B->ops->destroy    = MatDestroy_MPIAIJ;
1625   B->ops->view       = MatView_MPIAIJ;
1626   B->factor     = 0;
1627   B->assembled  = PETSC_FALSE;
1628   B->mapping    = 0;
1629 
1630   B->insertmode = NOT_SET_VALUES;
1631   b->size       = size;
1632   MPI_Comm_rank(comm,&b->rank);
1633 
1634   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1635     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1636   }
1637 
1638   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1639     work[0] = m; work[1] = n;
1640     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1641     if (M == PETSC_DECIDE) M = sum[0];
1642     if (N == PETSC_DECIDE) N = sum[1];
1643   }
1644   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1645   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1646   b->m = m; B->m = m;
1647   b->n = n; B->n = n;
1648   b->N = N; B->N = N;
1649   b->M = M; B->M = M;
1650 
1651   /* build local table of row and column ownerships */
1652   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1653   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1654   b->cowners = b->rowners + b->size + 2;
1655   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1656   b->rowners[0] = 0;
1657   for ( i=2; i<=b->size; i++ ) {
1658     b->rowners[i] += b->rowners[i-1];
1659   }
1660   b->rstart = b->rowners[b->rank];
1661   b->rend   = b->rowners[b->rank+1];
1662   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1663   b->cowners[0] = 0;
1664   for ( i=2; i<=b->size; i++ ) {
1665     b->cowners[i] += b->cowners[i-1];
1666   }
1667   b->cstart = b->cowners[b->rank];
1668   b->cend   = b->cowners[b->rank+1];
1669 
1670   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1671   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1672   PLogObjectParent(B,b->A);
1673   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1674   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1675   PLogObjectParent(B,b->B);
1676 
1677   /* build cache for off array entries formed */
1678   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1679   b->donotstash  = 0;
1680   b->colmap      = 0;
1681   b->garray      = 0;
1682   b->roworiented = 1;
1683 
1684   /* stuff used for matrix vector multiply */
1685   b->lvec      = 0;
1686   b->Mvctx     = 0;
1687 
1688   /* stuff for MatGetRow() */
1689   b->rowindices   = 0;
1690   b->rowvalues    = 0;
1691   b->getrowactive = PETSC_FALSE;
1692 
1693   *A = B;
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNC__
1698 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1699 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1700 {
1701   Mat        mat;
1702   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1703   int        ierr, len=0, flg;
1704 
1705   PetscFunctionBegin;
1706   *newmat       = 0;
1707   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1708   PLogObjectCreate(mat);
1709   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1710   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1711   mat->ops->destroy    = MatDestroy_MPIAIJ;
1712   mat->ops->view       = MatView_MPIAIJ;
1713   mat->factor     = matin->factor;
1714   mat->assembled  = PETSC_TRUE;
1715 
1716   a->m = mat->m   = oldmat->m;
1717   a->n = mat->n   = oldmat->n;
1718   a->M = mat->M   = oldmat->M;
1719   a->N = mat->N   = oldmat->N;
1720 
1721   a->rstart       = oldmat->rstart;
1722   a->rend         = oldmat->rend;
1723   a->cstart       = oldmat->cstart;
1724   a->cend         = oldmat->cend;
1725   a->size         = oldmat->size;
1726   a->rank         = oldmat->rank;
1727   mat->insertmode = NOT_SET_VALUES;
1728   a->rowvalues    = 0;
1729   a->getrowactive = PETSC_FALSE;
1730 
1731   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1732   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1733   a->cowners = a->rowners + a->size + 2;
1734   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1735   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1736   if (oldmat->colmap) {
1737     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1738     PLogObjectMemory(mat,(a->N)*sizeof(int));
1739     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1740   } else a->colmap = 0;
1741   if (oldmat->garray) {
1742     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1743     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1744     PLogObjectMemory(mat,len*sizeof(int));
1745     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1746   } else a->garray = 0;
1747 
1748   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1749   PLogObjectParent(mat,a->lvec);
1750   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1751   PLogObjectParent(mat,a->Mvctx);
1752   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1753   PLogObjectParent(mat,a->A);
1754   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1755   PLogObjectParent(mat,a->B);
1756   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1757   if (flg) {
1758     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1759   }
1760   *newmat = mat;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #include "sys.h"
1765 
1766 #undef __FUNC__
1767 #define __FUNC__ "MatLoad_MPIAIJ"
1768 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1769 {
1770   Mat          A;
1771   Scalar       *vals,*svals;
1772   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1773   MPI_Status   status;
1774   int          i, nz, ierr, j,rstart, rend, fd;
1775   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1776   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1777   int          tag = ((PetscObject)viewer)->tag;
1778 
1779   PetscFunctionBegin;
1780   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1781   if (!rank) {
1782     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1783     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1784     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1785     if (header[3] < 0) {
1786       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1787     }
1788   }
1789 
1790 
1791   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1792   M = header[1]; N = header[2];
1793   /* determine ownership of all rows */
1794   m = M/size + ((M % size) > rank);
1795   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1796   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1797   rowners[0] = 0;
1798   for ( i=2; i<=size; i++ ) {
1799     rowners[i] += rowners[i-1];
1800   }
1801   rstart = rowners[rank];
1802   rend   = rowners[rank+1];
1803 
1804   /* distribute row lengths to all processors */
1805   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1806   offlens = ourlens + (rend-rstart);
1807   if (!rank) {
1808     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1809     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1810     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1811     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1812     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1813     PetscFree(sndcounts);
1814   } else {
1815     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1816   }
1817 
1818   if (!rank) {
1819     /* calculate the number of nonzeros on each processor */
1820     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1821     PetscMemzero(procsnz,size*sizeof(int));
1822     for ( i=0; i<size; i++ ) {
1823       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1824         procsnz[i] += rowlengths[j];
1825       }
1826     }
1827     PetscFree(rowlengths);
1828 
1829     /* determine max buffer needed and allocate it */
1830     maxnz = 0;
1831     for ( i=0; i<size; i++ ) {
1832       maxnz = PetscMax(maxnz,procsnz[i]);
1833     }
1834     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1835 
1836     /* read in my part of the matrix column indices  */
1837     nz     = procsnz[0];
1838     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1839     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1840 
1841     /* read in every one elses and ship off */
1842     for ( i=1; i<size; i++ ) {
1843       nz   = procsnz[i];
1844       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1845       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1846     }
1847     PetscFree(cols);
1848   } else {
1849     /* determine buffer space needed for message */
1850     nz = 0;
1851     for ( i=0; i<m; i++ ) {
1852       nz += ourlens[i];
1853     }
1854     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1855 
1856     /* receive message of column indices*/
1857     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1858     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1859     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1860   }
1861 
1862   /* loop over local rows, determining number of off diagonal entries */
1863   PetscMemzero(offlens,m*sizeof(int));
1864   jj = 0;
1865   for ( i=0; i<m; i++ ) {
1866     for ( j=0; j<ourlens[i]; j++ ) {
1867       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1868       jj++;
1869     }
1870   }
1871 
1872   /* create our matrix */
1873   for ( i=0; i<m; i++ ) {
1874     ourlens[i] -= offlens[i];
1875   }
1876   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1877   A = *newmat;
1878   MatSetOption(A,MAT_COLUMNS_SORTED);
1879   for ( i=0; i<m; i++ ) {
1880     ourlens[i] += offlens[i];
1881   }
1882 
1883   if (!rank) {
1884     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1885 
1886     /* read in my part of the matrix numerical values  */
1887     nz = procsnz[0];
1888     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1889 
1890     /* insert into matrix */
1891     jj      = rstart;
1892     smycols = mycols;
1893     svals   = vals;
1894     for ( i=0; i<m; i++ ) {
1895       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1896       smycols += ourlens[i];
1897       svals   += ourlens[i];
1898       jj++;
1899     }
1900 
1901     /* read in other processors and ship out */
1902     for ( i=1; i<size; i++ ) {
1903       nz   = procsnz[i];
1904       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1905       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1906     }
1907     PetscFree(procsnz);
1908   } else {
1909     /* receive numeric values */
1910     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1911 
1912     /* receive message of values*/
1913     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1914     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1915     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1916 
1917     /* insert into matrix */
1918     jj      = rstart;
1919     smycols = mycols;
1920     svals   = vals;
1921     for ( i=0; i<m; i++ ) {
1922       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1923       smycols += ourlens[i];
1924       svals   += ourlens[i];
1925       jj++;
1926     }
1927   }
1928   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1929 
1930   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1931   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNC__
1936 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1937 /*
1938     Not great since it makes two copies of the submatrix, first an SeqAIJ
1939   in local and then by concatenating the local matrices the end result.
1940   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1941 */
1942 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1943 {
1944   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1945   Mat        *local,M;
1946   Scalar     *vwork,*aa;
1947   MPI_Comm   comm = mat->comm;
1948   Mat_SeqAIJ *aij;
1949   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1950 
1951   PetscFunctionBegin;
1952   MPI_Comm_rank(comm,&rank);
1953   MPI_Comm_size(comm,&size);
1954 
1955   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1956 
1957   /*
1958       m - number of local rows
1959       n - number of columns (same on all processors)
1960       rstart - first row in new global matrix generated
1961   */
1962   ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr);
1963   if (call == MAT_INITIAL_MATRIX) {
1964     aij = (Mat_SeqAIJ *) (*local)->data;
1965     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
1966     ii  = aij->i;
1967     jj  = aij->j;
1968 
1969     /*
1970         Determine the number of non-zeros in the diagonal and off-diagonal
1971         portions of the matrix in order to do correct preallocation
1972     */
1973 
1974     /* first get start and end of "diagonal" columns */
1975     if (csize == PETSC_DECIDE) {
1976       nlocal = n/size + ((n % size) > rank);
1977     } else {
1978       nlocal = csize;
1979     }
1980     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1981     rstart = rend - nlocal;
1982     if (rank == size - 1 && rend != n) {
1983       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
1984     }
1985 
1986     /* next, compute all the lengths */
1987     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
1988     olens = dlens + m;
1989     for ( i=0; i<m; i++ ) {
1990       jend = ii[i+1] - ii[i];
1991       olen = 0;
1992       dlen = 0;
1993       for ( j=0; j<jend; j++ ) {
1994         if ( *jj < rstart || *jj >= rend) olen++;
1995         else dlen++;
1996         jj++;
1997       }
1998       olens[i] = olen;
1999       dlens[i] = dlen;
2000     }
2001     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2002     PetscFree(dlens);
2003   } else {
2004     int ml,nl;
2005 
2006     M = *newmat;
2007     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2008     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2009     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2010   }
2011   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2012   aij = (Mat_SeqAIJ *) (*local)->data;
2013   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2014   ii  = aij->i;
2015   jj  = aij->j;
2016   aa  = aij->a;
2017   for (i=0; i<m; i++) {
2018     row   = rstart + i;
2019     nz    = ii[i+1] - ii[i];
2020     cwork = jj;     jj += nz;
2021     vwork = aa;     aa += nz;
2022     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2023   }
2024 
2025   ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr);
2026   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2027   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2028   *newmat = M;
2029   PetscFunctionReturn(0);
2030 }
2031