xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision db81eaa069e82fd94d5ad0d278d67f90302bbbae)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.239 1998/04/15 22:50:53 curfman Exp curfman $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     } else {
294       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
295     }
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNC__
301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
303 {
304   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
305   MPI_Comm    comm = mat->comm;
306   int         size = aij->size, *owners = aij->rowners;
307   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
308   MPI_Request *send_waits,*recv_waits;
309   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
310   InsertMode  addv;
311   Scalar      *rvalues,*svalues;
312 
313   PetscFunctionBegin;
314   /* make sure all processors are either in INSERTMODE or ADDMODE */
315   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
316   if (addv == (ADD_VALUES|INSERT_VALUES)) {
317     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
318   }
319   mat->insertmode = addv; /* in case this processor had no cache */
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
325   for ( i=0; i<aij->stash.n; i++ ) {
326     idx = aij->stash.idx[i];
327     for ( j=0; j<size; j++ ) {
328       if (idx >= owners[j] && idx < owners[j+1]) {
329         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
330       }
331     }
332   }
333   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
334 
335   /* inform other processors of number of messages and max length*/
336   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
337   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
338   nreceives = work[rank];
339   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
340   nmax = work[rank];
341   PetscFree(work);
342 
343   /* post receives:
344        1) each message will consist of ordered pairs
345      (global index,value) we store the global index as a double
346      to simplify the message passing.
347        2) since we don't know how long each individual message is we
348      allocate the largest needed buffer for each receive. Potentially
349      this is a lot of wasted space.
350 
351 
352        This could be done better.
353   */
354   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
355   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
356   for ( i=0; i<nreceives; i++ ) {
357     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
358               comm,recv_waits+i);CHKERRQ(ierr);
359   }
360 
361   /* do sends:
362       1) starts[i] gives the starting index in svalues for stuff going to
363          the ith processor
364   */
365   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
366   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
367   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
368   starts[0]  = 0;
369   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
370   for ( i=0; i<aij->stash.n; i++ ) {
371     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
372     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
373     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
374   }
375   PetscFree(owner);
376   starts[0] = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
382     }
383   }
384   PetscFree(starts); PetscFree(nprocs);
385 
386   /* Free cache space */
387   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
388   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
389 
390   aij->svalues    = svalues;    aij->rvalues    = rvalues;
391   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
392   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
393   aij->rmax       = nmax;
394 
395   PetscFunctionReturn(0);
396 }
397 extern int MatSetUpMultiply_MPIAIJ(Mat);
398 
399 #undef __FUNC__
400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
402 {
403   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
404   MPI_Status  *send_status,recv_status;
405   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
406   int         row,col,other_disassembled;
407   Scalar      *values,val;
408   InsertMode  addv = mat->insertmode;
409 
410   PetscFunctionBegin;
411   /*  wait on receives */
412   while (count) {
413     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
414     /* unpack receives into our local space */
415     values = aij->rvalues + 3*imdex*aij->rmax;
416     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
417     n = n/3;
418     for ( i=0; i<n; i++ ) {
419       row = (int) PetscReal(values[3*i]) - aij->rstart;
420       col = (int) PetscReal(values[3*i+1]);
421       val = values[3*i+2];
422       if (col >= aij->cstart && col < aij->cend) {
423         col -= aij->cstart;
424         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
425       } else {
426         if (mat->was_assembled || mat->assembled) {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
429           }
430           col = aij->colmap[col] - 1;
431           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
432             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
433             col = (int) PetscReal(values[3*i+1]);
434           }
435         }
436         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
437       }
438     }
439     count--;
440   }
441   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
442 
443   /* wait on sends */
444   if (aij->nsends) {
445     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
446     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
447     PetscFree(send_status);
448   }
449   PetscFree(aij->send_waits); PetscFree(aij->svalues);
450 
451   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
452   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
453 
454   /* determine if any processor has disassembled, if so we must
455      also disassemble ourselfs, in order that we may reassemble. */
456   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
457   if (mat->was_assembled && !other_disassembled) {
458     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
459   }
460 
461   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
462     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
463   }
464   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
465   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
466 
467   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatZeroEntries_MPIAIJ"
473 int MatZeroEntries_MPIAIJ(Mat A)
474 {
475   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
476   int        ierr;
477 
478   PetscFunctionBegin;
479   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
480   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNC__
485 #define __FUNC__ "MatZeroRows_MPIAIJ"
486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
487 {
488   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
489   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
490   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
491   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
492   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
493   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
494   MPI_Comm       comm = A->comm;
495   MPI_Request    *send_waits,*recv_waits;
496   MPI_Status     recv_status,*send_status;
497   IS             istmp;
498 
499   PetscFunctionBegin;
500   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
501   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
502 
503   /*  first count number of contributors to each processor */
504   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
505   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
506   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
507   for ( i=0; i<N; i++ ) {
508     idx = rows[i];
509     found = 0;
510     for ( j=0; j<size; j++ ) {
511       if (idx >= owners[j] && idx < owners[j+1]) {
512         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
513       }
514     }
515     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
516   }
517   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
518 
519   /* inform other processors of number of messages and max length*/
520   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
521   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
522   nrecvs = work[rank];
523   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
524   nmax = work[rank];
525   PetscFree(work);
526 
527   /* post receives:   */
528   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
529   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
530   for ( i=0; i<nrecvs; i++ ) {
531     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
532   }
533 
534   /* do sends:
535       1) starts[i] gives the starting index in svalues for stuff going to
536          the ith processor
537   */
538   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
539   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
540   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
541   starts[0] = 0;
542   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
543   for ( i=0; i<N; i++ ) {
544     svalues[starts[owner[i]]++] = rows[i];
545   }
546   ISRestoreIndices(is,&rows);
547 
548   starts[0] = 0;
549   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
550   count = 0;
551   for ( i=0; i<size; i++ ) {
552     if (procs[i]) {
553       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
554     }
555   }
556   PetscFree(starts);
557 
558   base = owners[rank];
559 
560   /*  wait on receives */
561   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
562   source = lens + nrecvs;
563   count  = nrecvs; slen = 0;
564   while (count) {
565     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
566     /* unpack receives into our local space */
567     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
568     source[imdex]  = recv_status.MPI_SOURCE;
569     lens[imdex]  = n;
570     slen += n;
571     count--;
572   }
573   PetscFree(recv_waits);
574 
575   /* move the data into the send scatter */
576   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
577   count = 0;
578   for ( i=0; i<nrecvs; i++ ) {
579     values = rvalues + i*nmax;
580     for ( j=0; j<lens[i]; j++ ) {
581       lrows[count++] = values[j] - base;
582     }
583   }
584   PetscFree(rvalues); PetscFree(lens);
585   PetscFree(owner); PetscFree(nprocs);
586 
587   /* actually zap the local rows */
588   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
589   PLogObjectParent(A,istmp);
590 
591   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
592   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
593   ierr = ISDestroy(istmp); CHKERRQ(ierr);
594 
595   if (diag) {
596     for ( i = 0; i < slen; i++ ) {
597       row = lrows[i] + rstart;
598       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
599     }
600     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
601     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
602   }
603   PetscFree(lrows);
604  /* wait on sends */
605   if (nsends) {
606     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
607     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
608     PetscFree(send_status);
609   }
610   PetscFree(send_waits); PetscFree(svalues);
611 
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNC__
616 #define __FUNC__ "MatMult_MPIAIJ"
617 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
618 {
619   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
620   int        ierr,nt;
621 
622   PetscFunctionBegin;
623   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
624   if (nt != a->n) {
625     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
626   }
627   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
628   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
629   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
630   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNC__
635 #define __FUNC__ "MatMultAdd_MPIAIJ"
636 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
643   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
644   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
645   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNC__
650 #define __FUNC__ "MatMultTrans_MPIAIJ"
651 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
652 {
653   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
654   int        ierr;
655 
656   PetscFunctionBegin;
657   /* do nondiagonal part */
658   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
659   /* send it on its way */
660   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
661   /* do local part */
662   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
663   /* receive remote parts: note this assumes the values are not actually */
664   /* inserted in yy until the next line, which is true for my implementation*/
665   /* but is not perhaps always true. */
666   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNC__
671 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
672 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
673 {
674   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
675   int        ierr;
676 
677   PetscFunctionBegin;
678   /* do nondiagonal part */
679   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
680   /* send it on its way */
681   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682   /* do local part */
683   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
684   /* receive remote parts: note this assumes the values are not actually */
685   /* inserted in yy until the next line, which is true for my implementation*/
686   /* but is not perhaps always true. */
687   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 /*
692   This only works correctly for square matrices where the subblock A->A is the
693    diagonal block
694 */
695 #undef __FUNC__
696 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
697 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
698 {
699   int        ierr;
700   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
701 
702   PetscFunctionBegin;
703   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
704   if (a->rstart != a->cstart || a->rend != a->cend) {
705     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
706   }
707   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNC__
712 #define __FUNC__ "MatScale_MPIAIJ"
713 int MatScale_MPIAIJ(Scalar *aa,Mat A)
714 {
715   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
716   int        ierr;
717 
718   PetscFunctionBegin;
719   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
720   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNC__
725 #define __FUNC__ "MatDestroy_MPIAIJ"
726 int MatDestroy_MPIAIJ(Mat mat)
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
729   int        ierr;
730 
731   PetscFunctionBegin;
732 #if defined(USE_PETSC_LOG)
733   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
734 #endif
735   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
736   PetscFree(aij->rowners);
737   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
738   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
739   if (aij->colmap) PetscFree(aij->colmap);
740   if (aij->garray) PetscFree(aij->garray);
741   if (aij->lvec)   VecDestroy(aij->lvec);
742   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
743   if (aij->rowvalues) PetscFree(aij->rowvalues);
744   PetscFree(aij);
745   PLogObjectDestroy(mat);
746   PetscHeaderDestroy(mat);
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNC__
751 #define __FUNC__ "MatView_MPIAIJ_Binary"
752 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
753 {
754   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
755   int         ierr;
756 
757   PetscFunctionBegin;
758   if (aij->size == 1) {
759     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
760   }
761   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNC__
766 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
767 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
768 {
769   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
770   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
771   int         ierr, format,shift = C->indexshift,rank;
772   FILE        *fd;
773   ViewerType  vtype;
774 
775   PetscFunctionBegin;
776   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
777   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
778     ierr = ViewerGetFormat(viewer,&format);
779     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
780       MatInfo info;
781       int     flg;
782       MPI_Comm_rank(mat->comm,&rank);
783       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
784       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
785       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
786       PetscSequentialPhaseBegin(mat->comm,1);
787       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
788          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
789       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
790          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
791       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
792       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
793       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
794       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
795       fflush(fd);
796       PetscSequentialPhaseEnd(mat->comm,1);
797       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
798       PetscFunctionReturn(0);
799     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
800       PetscFunctionReturn(0);
801     }
802   }
803 
804   if (vtype == DRAW_VIEWER) {
805     Draw       draw;
806     PetscTruth isnull;
807     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
808     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
809   }
810 
811   if (vtype == ASCII_FILE_VIEWER) {
812     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
813     PetscSequentialPhaseBegin(mat->comm,1);
814     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
815            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
816            aij->cend);
817     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
818     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
819     fflush(fd);
820     PetscSequentialPhaseEnd(mat->comm,1);
821   } else {
822     int size = aij->size;
823     rank = aij->rank;
824     if (size == 1) {
825       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
826     } else {
827       /* assemble the entire matrix onto first processor. */
828       Mat         A;
829       Mat_SeqAIJ *Aloc;
830       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
831       Scalar      *a;
832 
833       if (!rank) {
834         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
835       } else {
836         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
837       }
838       PLogObjectParent(mat,A);
839 
840       /* copy over the A part */
841       Aloc = (Mat_SeqAIJ*) aij->A->data;
842       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
843       row = aij->rstart;
844       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
845       for ( i=0; i<m; i++ ) {
846         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
847         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
848       }
849       aj = Aloc->j;
850       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
851 
852       /* copy over the B part */
853       Aloc = (Mat_SeqAIJ*) aij->B->data;
854       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
855       row = aij->rstart;
856       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
857       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
858       for ( i=0; i<m; i++ ) {
859         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
860         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
861       }
862       PetscFree(ct);
863       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
864       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
865       /*
866          Everyone has to call to draw the matrix since the graphics waits are
867          synchronized across all processors that share the Draw object
868       */
869       if (!rank || vtype == DRAW_VIEWER) {
870         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
871       }
872       ierr = MatDestroy(A); CHKERRQ(ierr);
873     }
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNC__
879 #define __FUNC__ "MatView_MPIAIJ"
880 int MatView_MPIAIJ(Mat mat,Viewer viewer)
881 {
882   int         ierr;
883   ViewerType  vtype;
884 
885   PetscFunctionBegin;
886   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
887   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
888       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
889     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
890   } else if (vtype == BINARY_FILE_VIEWER) {
891     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
892   } else {
893     SETERRQ(1,1,"Viewer type not supported by PETSc object");
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 /*
899     This has to provide several versions.
900 
901      2) a) use only local smoothing updating outer values only once.
902         b) local smoothing updating outer values each inner iteration
903      3) color updating out values betwen colors.
904 */
905 #undef __FUNC__
906 #define __FUNC__ "MatRelax_MPIAIJ"
907 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
908                            double fshift,int its,Vec xx)
909 {
910   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
911   Mat        AA = mat->A, BB = mat->B;
912   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
913   Scalar     *b,*x,*xs,*ls,d,*v,sum;
914   int        ierr,*idx, *diag;
915   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
916 
917   PetscFunctionBegin;
918   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
919   xs = x + shift; /* shift by one for index start of 1 */
920   ls = ls + shift;
921   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
922   diag = A->diag;
923   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
924     if (flag & SOR_ZERO_INITIAL_GUESS) {
925       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
926       PetscFunctionReturn(0);
927     }
928     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
929     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
930     while (its--) {
931       /* go down through the rows */
932       for ( i=0; i<m; i++ ) {
933         n    = A->i[i+1] - A->i[i];
934         idx  = A->j + A->i[i] + shift;
935         v    = A->a + A->i[i] + shift;
936         sum  = b[i];
937         SPARSEDENSEMDOT(sum,xs,v,idx,n);
938         d    = fshift + A->a[diag[i]+shift];
939         n    = B->i[i+1] - B->i[i];
940         idx  = B->j + B->i[i] + shift;
941         v    = B->a + B->i[i] + shift;
942         SPARSEDENSEMDOT(sum,ls,v,idx,n);
943         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
944       }
945       /* come up through the rows */
946       for ( i=m-1; i>-1; i-- ) {
947         n    = A->i[i+1] - A->i[i];
948         idx  = A->j + A->i[i] + shift;
949         v    = A->a + A->i[i] + shift;
950         sum  = b[i];
951         SPARSEDENSEMDOT(sum,xs,v,idx,n);
952         d    = fshift + A->a[diag[i]+shift];
953         n    = B->i[i+1] - B->i[i];
954         idx  = B->j + B->i[i] + shift;
955         v    = B->a + B->i[i] + shift;
956         SPARSEDENSEMDOT(sum,ls,v,idx,n);
957         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
958       }
959     }
960   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
961     if (flag & SOR_ZERO_INITIAL_GUESS) {
962       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     }
965     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
966     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
967     while (its--) {
968       for ( i=0; i<m; i++ ) {
969         n    = A->i[i+1] - A->i[i];
970         idx  = A->j + A->i[i] + shift;
971         v    = A->a + A->i[i] + shift;
972         sum  = b[i];
973         SPARSEDENSEMDOT(sum,xs,v,idx,n);
974         d    = fshift + A->a[diag[i]+shift];
975         n    = B->i[i+1] - B->i[i];
976         idx  = B->j + B->i[i] + shift;
977         v    = B->a + B->i[i] + shift;
978         SPARSEDENSEMDOT(sum,ls,v,idx,n);
979         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
980       }
981     }
982   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
983     if (flag & SOR_ZERO_INITIAL_GUESS) {
984       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     }
987     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
988                             mat->Mvctx); CHKERRQ(ierr);
989     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
990                             mat->Mvctx); CHKERRQ(ierr);
991     while (its--) {
992       for ( i=m-1; i>-1; i-- ) {
993         n    = A->i[i+1] - A->i[i];
994         idx  = A->j + A->i[i] + shift;
995         v    = A->a + A->i[i] + shift;
996         sum  = b[i];
997         SPARSEDENSEMDOT(sum,xs,v,idx,n);
998         d    = fshift + A->a[diag[i]+shift];
999         n    = B->i[i+1] - B->i[i];
1000         idx  = B->j + B->i[i] + shift;
1001         v    = B->a + B->i[i] + shift;
1002         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1003         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1004       }
1005     }
1006   } else {
1007     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNC__
1013 #define __FUNC__ "MatGetInfo_MPIAIJ"
1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1015 {
1016   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1017   Mat        A = mat->A, B = mat->B;
1018   int        ierr;
1019   double     isend[5], irecv[5];
1020 
1021   PetscFunctionBegin;
1022   info->block_size     = 1.0;
1023   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1024   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1025   isend[3] = info->memory;  isend[4] = info->mallocs;
1026   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1027   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1028   isend[3] += info->memory;  isend[4] += info->mallocs;
1029   if (flag == MAT_LOCAL) {
1030     info->nz_used      = isend[0];
1031     info->nz_allocated = isend[1];
1032     info->nz_unneeded  = isend[2];
1033     info->memory       = isend[3];
1034     info->mallocs      = isend[4];
1035   } else if (flag == MAT_GLOBAL_MAX) {
1036     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1037     info->nz_used      = irecv[0];
1038     info->nz_allocated = irecv[1];
1039     info->nz_unneeded  = irecv[2];
1040     info->memory       = irecv[3];
1041     info->mallocs      = irecv[4];
1042   } else if (flag == MAT_GLOBAL_SUM) {
1043     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1044     info->nz_used      = irecv[0];
1045     info->nz_allocated = irecv[1];
1046     info->nz_unneeded  = irecv[2];
1047     info->memory       = irecv[3];
1048     info->mallocs      = irecv[4];
1049   }
1050   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1051   info->fill_ratio_needed = 0;
1052   info->factor_mallocs    = 0;
1053   info->rows_global       = (double)mat->M;
1054   info->columns_global    = (double)mat->N;
1055   info->rows_local        = (double)mat->m;
1056   info->columns_local     = (double)mat->N;
1057 
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNC__
1062 #define __FUNC__ "MatSetOption_MPIAIJ"
1063 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1064 {
1065   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1066 
1067   PetscFunctionBegin;
1068   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1069       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1070       op == MAT_COLUMNS_UNSORTED ||
1071       op == MAT_COLUMNS_SORTED ||
1072       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1073       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1074         MatSetOption(a->A,op);
1075         MatSetOption(a->B,op);
1076   } else if (op == MAT_ROW_ORIENTED) {
1077     a->roworiented = 1;
1078     MatSetOption(a->A,op);
1079     MatSetOption(a->B,op);
1080   } else if (op == MAT_ROWS_SORTED ||
1081              op == MAT_ROWS_UNSORTED ||
1082              op == MAT_SYMMETRIC ||
1083              op == MAT_STRUCTURALLY_SYMMETRIC ||
1084              op == MAT_YES_NEW_DIAGONALS)
1085     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1086   else if (op == MAT_COLUMN_ORIENTED) {
1087     a->roworiented = 0;
1088     MatSetOption(a->A,op);
1089     MatSetOption(a->B,op);
1090   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1091     a->donotstash = 1;
1092   } else if (op == MAT_NO_NEW_DIAGONALS){
1093     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1094   } else {
1095     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1096   }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatGetSize_MPIAIJ"
1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1105 
1106   PetscFunctionBegin;
1107   if (m) *m = mat->M;
1108   if (n) *n = mat->N;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNC__
1113 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1115 {
1116   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1117 
1118   PetscFunctionBegin;
1119   if (m) *m = mat->m;
1120   if (n) *n = mat->n;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNC__
1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1127 {
1128   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1129 
1130   PetscFunctionBegin;
1131   *m = mat->rstart; *n = mat->rend;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1136 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1137 
1138 #undef __FUNC__
1139 #define __FUNC__ "MatGetRow_MPIAIJ"
1140 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1141 {
1142   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1143   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1144   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1145   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1146   int        *cmap, *idx_p;
1147 
1148   PetscFunctionBegin;
1149   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1150   mat->getrowactive = PETSC_TRUE;
1151 
1152   if (!mat->rowvalues && (idx || v)) {
1153     /*
1154         allocate enough space to hold information from the longest row.
1155     */
1156     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1157     int     max = 1,m = mat->m,tmp;
1158     for ( i=0; i<m; i++ ) {
1159       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1160       if (max < tmp) { max = tmp; }
1161     }
1162     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1163     CHKPTRQ(mat->rowvalues);
1164     mat->rowindices = (int *) (mat->rowvalues + max);
1165   }
1166 
1167   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1168   lrow = row - rstart;
1169 
1170   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1171   if (!v)   {pvA = 0; pvB = 0;}
1172   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1173   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1174   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1175   nztot = nzA + nzB;
1176 
1177   cmap  = mat->garray;
1178   if (v  || idx) {
1179     if (nztot) {
1180       /* Sort by increasing column numbers, assuming A and B already sorted */
1181       int imark = -1;
1182       if (v) {
1183         *v = v_p = mat->rowvalues;
1184         for ( i=0; i<nzB; i++ ) {
1185           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1186           else break;
1187         }
1188         imark = i;
1189         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1190         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1191       }
1192       if (idx) {
1193         *idx = idx_p = mat->rowindices;
1194         if (imark > -1) {
1195           for ( i=0; i<imark; i++ ) {
1196             idx_p[i] = cmap[cworkB[i]];
1197           }
1198         } else {
1199           for ( i=0; i<nzB; i++ ) {
1200             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1201             else break;
1202           }
1203           imark = i;
1204         }
1205         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1206         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1207       }
1208     }
1209     else {
1210       if (idx) *idx = 0;
1211       if (v)   *v   = 0;
1212     }
1213   }
1214   *nz = nztot;
1215   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1216   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNC__
1221 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1222 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1223 {
1224   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1225 
1226   PetscFunctionBegin;
1227   if (aij->getrowactive == PETSC_FALSE) {
1228     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1229   }
1230   aij->getrowactive = PETSC_FALSE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNC__
1235 #define __FUNC__ "MatNorm_MPIAIJ"
1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1237 {
1238   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1239   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1240   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1241   double     sum = 0.0;
1242   Scalar     *v;
1243 
1244   PetscFunctionBegin;
1245   if (aij->size == 1) {
1246     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1247   } else {
1248     if (type == NORM_FROBENIUS) {
1249       v = amat->a;
1250       for (i=0; i<amat->nz; i++ ) {
1251 #if defined(USE_PETSC_COMPLEX)
1252         sum += real(conj(*v)*(*v)); v++;
1253 #else
1254         sum += (*v)*(*v); v++;
1255 #endif
1256       }
1257       v = bmat->a;
1258       for (i=0; i<bmat->nz; i++ ) {
1259 #if defined(USE_PETSC_COMPLEX)
1260         sum += real(conj(*v)*(*v)); v++;
1261 #else
1262         sum += (*v)*(*v); v++;
1263 #endif
1264       }
1265       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1266       *norm = sqrt(*norm);
1267     } else if (type == NORM_1) { /* max column norm */
1268       double *tmp, *tmp2;
1269       int    *jj, *garray = aij->garray;
1270       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1271       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1272       PetscMemzero(tmp,aij->N*sizeof(double));
1273       *norm = 0.0;
1274       v = amat->a; jj = amat->j;
1275       for ( j=0; j<amat->nz; j++ ) {
1276         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1277       }
1278       v = bmat->a; jj = bmat->j;
1279       for ( j=0; j<bmat->nz; j++ ) {
1280         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1281       }
1282       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1283       for ( j=0; j<aij->N; j++ ) {
1284         if (tmp2[j] > *norm) *norm = tmp2[j];
1285       }
1286       PetscFree(tmp); PetscFree(tmp2);
1287     } else if (type == NORM_INFINITY) { /* max row norm */
1288       double ntemp = 0.0;
1289       for ( j=0; j<amat->m; j++ ) {
1290         v = amat->a + amat->i[j] + shift;
1291         sum = 0.0;
1292         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1293           sum += PetscAbsScalar(*v); v++;
1294         }
1295         v = bmat->a + bmat->i[j] + shift;
1296         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1297           sum += PetscAbsScalar(*v); v++;
1298         }
1299         if (sum > ntemp) ntemp = sum;
1300       }
1301       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1302     } else {
1303       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1304     }
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNC__
1310 #define __FUNC__ "MatTranspose_MPIAIJ"
1311 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1312 {
1313   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1314   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1315   int        ierr,shift = Aloc->indexshift;
1316   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1317   Mat        B;
1318   Scalar     *array;
1319 
1320   PetscFunctionBegin;
1321   if (matout == PETSC_NULL && M != N) {
1322     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1323   }
1324 
1325   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1326 
1327   /* copy over the A part */
1328   Aloc = (Mat_SeqAIJ*) a->A->data;
1329   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1330   row = a->rstart;
1331   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1332   for ( i=0; i<m; i++ ) {
1333     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1334     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1335   }
1336   aj = Aloc->j;
1337   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1338 
1339   /* copy over the B part */
1340   Aloc = (Mat_SeqAIJ*) a->B->data;
1341   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1342   row = a->rstart;
1343   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1344   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1345   for ( i=0; i<m; i++ ) {
1346     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1347     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1348   }
1349   PetscFree(ct);
1350   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1351   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1352   if (matout != PETSC_NULL) {
1353     *matout = B;
1354   } else {
1355     PetscOps       *Abops;
1356     struct _MatOps *Aops;
1357 
1358     /* This isn't really an in-place transpose .... but free data structures from a */
1359     PetscFree(a->rowners);
1360     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1361     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1362     if (a->colmap) PetscFree(a->colmap);
1363     if (a->garray) PetscFree(a->garray);
1364     if (a->lvec) VecDestroy(a->lvec);
1365     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1366     PetscFree(a);
1367 
1368     /*
1369        This is horrible, horrible code. We need to keep the
1370       A pointers for the bops and ops but copy everything
1371       else from C.
1372     */
1373     Abops = A->bops;
1374     Aops  = A->ops;
1375     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1376     A->bops = Abops;
1377     A->ops  = Aops;
1378     PetscHeaderDestroy(B);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNC__
1384 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1385 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1386 {
1387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1388   Mat a = aij->A, b = aij->B;
1389   int ierr,s1,s2,s3;
1390 
1391   PetscFunctionBegin;
1392   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1393   if (rr) {
1394     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1395     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1396     /* Overlap communication with computation. */
1397     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1398   }
1399   if (ll) {
1400     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1401     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1402     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1403   }
1404   /* scale  the diagonal block */
1405   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1406 
1407   if (rr) {
1408     /* Do a scatter end and then right scale the off-diagonal block */
1409     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1410     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1411   }
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 
1417 extern int MatPrintHelp_SeqAIJ(Mat);
1418 #undef __FUNC__
1419 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1420 int MatPrintHelp_MPIAIJ(Mat A)
1421 {
1422   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1423   int        ierr;
1424 
1425   PetscFunctionBegin;
1426   if (!a->rank) {
1427     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNC__
1433 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1435 {
1436   PetscFunctionBegin;
1437   *bs = 1;
1438   PetscFunctionReturn(0);
1439 }
1440 #undef __FUNC__
1441 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1442 int MatSetUnfactored_MPIAIJ(Mat A)
1443 {
1444   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1445   int        ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNC__
1453 #define __FUNC__ "MatEqual_MPIAIJ"
1454 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1455 {
1456   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1457   Mat        a, b, c, d;
1458   PetscTruth flg;
1459   int        ierr;
1460 
1461   PetscFunctionBegin;
1462   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1463   a = matA->A; b = matA->B;
1464   c = matB->A; d = matB->B;
1465 
1466   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1467   if (flg == PETSC_TRUE) {
1468     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1469   }
1470   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1475 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1476 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1477 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1478 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1479 
1480 /* -------------------------------------------------------------------*/
1481 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1482        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1483        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1484        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1485        0,0,
1486        0,0,
1487        0,0,
1488        MatRelax_MPIAIJ,
1489        MatTranspose_MPIAIJ,
1490        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1491        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1492        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1493        0,
1494        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1495        0,0,0,0,
1496        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1497        0,0,
1498        0,0,MatConvertSameType_MPIAIJ,0,0,
1499        0,0,0,
1500        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1501        MatPrintHelp_MPIAIJ,
1502        MatScale_MPIAIJ,0,0,0,
1503        MatGetBlockSize_MPIAIJ,0,0,0,0,
1504        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1505        0,0,MatGetSubMatrix_MPIAIJ};
1506 
1507 
1508 #undef __FUNC__
1509 #define __FUNC__ "MatCreateMPIAIJ"
1510 /*@C
1511    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1512    (the default parallel PETSc format).  For good matrix assembly performance
1513    the user should preallocate the matrix storage by setting the parameters
1514    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1515    performance can be increased by more than a factor of 50.
1516 
1517    Collective on MPI_Comm
1518 
1519    Input Parameters:
1520 +  comm - MPI communicator
1521 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1522            This value should be the same as the local size used in creating the
1523            y vector for the matrix-vector product y = Ax.
1524 .  n - This value should be the same as the local size used in creating the
1525        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1526        calculated if N is given) For square matrices n is almost always m.
1527 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1528 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1529 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1530            (same for all local rows)
1531 .  d_nzz - array containing the number of nonzeros in the various rows of the
1532            diagonal portion of the local submatrix (possibly different for each row)
1533            or PETSC_NULL. You must leave room for the diagonal entry even if
1534            it is zero.
1535 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1536            submatrix (same for all local rows).
1537 -  o_nzz - array containing the number of nonzeros in the various rows of the
1538            off-diagonal portion of the local submatrix (possibly different for
1539            each row) or PETSC_NULL.
1540 
1541    Output Parameter:
1542 .  A - the matrix
1543 
1544    Notes:
1545    The AIJ format (also called the Yale sparse matrix format or
1546    compressed row storage), is fully compatible with standard Fortran 77
1547    storage.  That is, the stored row and column indices can begin at
1548    either one (as in Fortran) or zero.  See the users manual for details.
1549 
1550    The user MUST specify either the local or global matrix dimensions
1551    (possibly both).
1552 
1553    By default, this format uses inodes (identical nodes) when possible.
1554    We search for consecutive rows with the same nonzero structure, thereby
1555    reusing matrix information to achieve increased efficiency.
1556 
1557    Options Database Keys:
1558 +  -mat_aij_no_inode  - Do not use inodes
1559 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1560 -  -mat_aij_oneindex - Internally use indexing starting at 1
1561         rather than 0.  Note that when calling MatSetValues(),
1562         the user still MUST index entries starting at 0!
1563 
1564    Storage Information:
1565    For a square global matrix we define each processor's diagonal portion
1566    to be its local rows and the corresponding columns (a square submatrix);
1567    each processor's off-diagonal portion encompasses the remainder of the
1568    local matrix (a rectangular submatrix).
1569 
1570    The user can specify preallocated storage for the diagonal part of
1571    the local submatrix with either d_nz or d_nnz (not both).  Set
1572    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1573    memory allocation.  Likewise, specify preallocated storage for the
1574    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1575 
1576    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1577    the figure below we depict these three local rows and all columns (0-11).
1578 
1579 .vb
1580              0 1 2 3 4 5 6 7 8 9 10 11
1581             -------------------
1582      row 3  |  o o o d d d o o o o o o
1583      row 4  |  o o o d d d o o o o o o
1584      row 5  |  o o o d d d o o o o o o
1585             -------------------
1586 .ve
1587 
1588    Thus, any entries in the d locations are stored in the d (diagonal)
1589    submatrix, and any entries in the o locations are stored in the
1590    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1591    stored simply in the MATSEQAIJ format for compressed row storage.
1592 
1593    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1594    and o_nz should indicate the number of nonzeros per row in the o matrix.
1595    In general, for PDE problems in which most nonzeros are near the diagonal,
1596    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1597    or you will get TERRIBLE performance; see the users' manual chapter on
1598    matrices.
1599 
1600 .keywords: matrix, aij, compressed row, sparse, parallel
1601 
1602 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1603 @*/
1604 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1605 {
1606   Mat          B;
1607   Mat_MPIAIJ   *b;
1608   int          ierr, i,sum[2],work[2],size;
1609 
1610   PetscFunctionBegin;
1611   MPI_Comm_size(comm,&size);
1612   if (size == 1) {
1613     if (M == PETSC_DECIDE) M = m;
1614     if (N == PETSC_DECIDE) N = n;
1615     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1616     PetscFunctionReturn(0);
1617   }
1618 
1619   *A = 0;
1620   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1621   PLogObjectCreate(B);
1622   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1623   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1624   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1625   B->ops->destroy    = MatDestroy_MPIAIJ;
1626   B->ops->view       = MatView_MPIAIJ;
1627   B->factor     = 0;
1628   B->assembled  = PETSC_FALSE;
1629   B->mapping    = 0;
1630 
1631   B->insertmode = NOT_SET_VALUES;
1632   b->size       = size;
1633   MPI_Comm_rank(comm,&b->rank);
1634 
1635   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1636     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1637   }
1638 
1639   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1640     work[0] = m; work[1] = n;
1641     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1642     if (M == PETSC_DECIDE) M = sum[0];
1643     if (N == PETSC_DECIDE) N = sum[1];
1644   }
1645   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1646   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1647   b->m = m; B->m = m;
1648   b->n = n; B->n = n;
1649   b->N = N; B->N = N;
1650   b->M = M; B->M = M;
1651 
1652   /* build local table of row and column ownerships */
1653   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1654   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1655   b->cowners = b->rowners + b->size + 2;
1656   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1657   b->rowners[0] = 0;
1658   for ( i=2; i<=b->size; i++ ) {
1659     b->rowners[i] += b->rowners[i-1];
1660   }
1661   b->rstart = b->rowners[b->rank];
1662   b->rend   = b->rowners[b->rank+1];
1663   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1664   b->cowners[0] = 0;
1665   for ( i=2; i<=b->size; i++ ) {
1666     b->cowners[i] += b->cowners[i-1];
1667   }
1668   b->cstart = b->cowners[b->rank];
1669   b->cend   = b->cowners[b->rank+1];
1670 
1671   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1672   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1673   PLogObjectParent(B,b->A);
1674   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1675   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1676   PLogObjectParent(B,b->B);
1677 
1678   /* build cache for off array entries formed */
1679   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1680   b->donotstash  = 0;
1681   b->colmap      = 0;
1682   b->garray      = 0;
1683   b->roworiented = 1;
1684 
1685   /* stuff used for matrix vector multiply */
1686   b->lvec      = 0;
1687   b->Mvctx     = 0;
1688 
1689   /* stuff for MatGetRow() */
1690   b->rowindices   = 0;
1691   b->rowvalues    = 0;
1692   b->getrowactive = PETSC_FALSE;
1693 
1694   *A = B;
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNC__
1699 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1700 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1701 {
1702   Mat        mat;
1703   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1704   int        ierr, len=0, flg;
1705 
1706   PetscFunctionBegin;
1707   *newmat       = 0;
1708   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1709   PLogObjectCreate(mat);
1710   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1711   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1712   mat->ops->destroy    = MatDestroy_MPIAIJ;
1713   mat->ops->view       = MatView_MPIAIJ;
1714   mat->factor     = matin->factor;
1715   mat->assembled  = PETSC_TRUE;
1716 
1717   a->m = mat->m   = oldmat->m;
1718   a->n = mat->n   = oldmat->n;
1719   a->M = mat->M   = oldmat->M;
1720   a->N = mat->N   = oldmat->N;
1721 
1722   a->rstart       = oldmat->rstart;
1723   a->rend         = oldmat->rend;
1724   a->cstart       = oldmat->cstart;
1725   a->cend         = oldmat->cend;
1726   a->size         = oldmat->size;
1727   a->rank         = oldmat->rank;
1728   mat->insertmode = NOT_SET_VALUES;
1729   a->rowvalues    = 0;
1730   a->getrowactive = PETSC_FALSE;
1731 
1732   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1733   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1734   a->cowners = a->rowners + a->size + 2;
1735   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1736   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1737   if (oldmat->colmap) {
1738     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1739     PLogObjectMemory(mat,(a->N)*sizeof(int));
1740     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1741   } else a->colmap = 0;
1742   if (oldmat->garray) {
1743     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1744     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1745     PLogObjectMemory(mat,len*sizeof(int));
1746     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1747   } else a->garray = 0;
1748 
1749   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1750   PLogObjectParent(mat,a->lvec);
1751   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1752   PLogObjectParent(mat,a->Mvctx);
1753   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1754   PLogObjectParent(mat,a->A);
1755   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1756   PLogObjectParent(mat,a->B);
1757   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1758   if (flg) {
1759     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1760   }
1761   *newmat = mat;
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #include "sys.h"
1766 
1767 #undef __FUNC__
1768 #define __FUNC__ "MatLoad_MPIAIJ"
1769 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1770 {
1771   Mat          A;
1772   Scalar       *vals,*svals;
1773   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1774   MPI_Status   status;
1775   int          i, nz, ierr, j,rstart, rend, fd;
1776   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1777   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1778   int          tag = ((PetscObject)viewer)->tag;
1779 
1780   PetscFunctionBegin;
1781   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1782   if (!rank) {
1783     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1784     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1785     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1786     if (header[3] < 0) {
1787       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1788     }
1789   }
1790 
1791 
1792   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1793   M = header[1]; N = header[2];
1794   /* determine ownership of all rows */
1795   m = M/size + ((M % size) > rank);
1796   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1797   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1798   rowners[0] = 0;
1799   for ( i=2; i<=size; i++ ) {
1800     rowners[i] += rowners[i-1];
1801   }
1802   rstart = rowners[rank];
1803   rend   = rowners[rank+1];
1804 
1805   /* distribute row lengths to all processors */
1806   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1807   offlens = ourlens + (rend-rstart);
1808   if (!rank) {
1809     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1810     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1811     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1812     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1813     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1814     PetscFree(sndcounts);
1815   } else {
1816     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1817   }
1818 
1819   if (!rank) {
1820     /* calculate the number of nonzeros on each processor */
1821     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1822     PetscMemzero(procsnz,size*sizeof(int));
1823     for ( i=0; i<size; i++ ) {
1824       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1825         procsnz[i] += rowlengths[j];
1826       }
1827     }
1828     PetscFree(rowlengths);
1829 
1830     /* determine max buffer needed and allocate it */
1831     maxnz = 0;
1832     for ( i=0; i<size; i++ ) {
1833       maxnz = PetscMax(maxnz,procsnz[i]);
1834     }
1835     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1836 
1837     /* read in my part of the matrix column indices  */
1838     nz     = procsnz[0];
1839     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1840     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1841 
1842     /* read in every one elses and ship off */
1843     for ( i=1; i<size; i++ ) {
1844       nz   = procsnz[i];
1845       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1846       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1847     }
1848     PetscFree(cols);
1849   } else {
1850     /* determine buffer space needed for message */
1851     nz = 0;
1852     for ( i=0; i<m; i++ ) {
1853       nz += ourlens[i];
1854     }
1855     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1856 
1857     /* receive message of column indices*/
1858     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1859     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1860     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1861   }
1862 
1863   /* loop over local rows, determining number of off diagonal entries */
1864   PetscMemzero(offlens,m*sizeof(int));
1865   jj = 0;
1866   for ( i=0; i<m; i++ ) {
1867     for ( j=0; j<ourlens[i]; j++ ) {
1868       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1869       jj++;
1870     }
1871   }
1872 
1873   /* create our matrix */
1874   for ( i=0; i<m; i++ ) {
1875     ourlens[i] -= offlens[i];
1876   }
1877   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1878   A = *newmat;
1879   MatSetOption(A,MAT_COLUMNS_SORTED);
1880   for ( i=0; i<m; i++ ) {
1881     ourlens[i] += offlens[i];
1882   }
1883 
1884   if (!rank) {
1885     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1886 
1887     /* read in my part of the matrix numerical values  */
1888     nz = procsnz[0];
1889     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1890 
1891     /* insert into matrix */
1892     jj      = rstart;
1893     smycols = mycols;
1894     svals   = vals;
1895     for ( i=0; i<m; i++ ) {
1896       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1897       smycols += ourlens[i];
1898       svals   += ourlens[i];
1899       jj++;
1900     }
1901 
1902     /* read in other processors and ship out */
1903     for ( i=1; i<size; i++ ) {
1904       nz   = procsnz[i];
1905       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1906       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1907     }
1908     PetscFree(procsnz);
1909   } else {
1910     /* receive numeric values */
1911     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1912 
1913     /* receive message of values*/
1914     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1915     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1916     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1917 
1918     /* insert into matrix */
1919     jj      = rstart;
1920     smycols = mycols;
1921     svals   = vals;
1922     for ( i=0; i<m; i++ ) {
1923       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1924       smycols += ourlens[i];
1925       svals   += ourlens[i];
1926       jj++;
1927     }
1928   }
1929   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1930 
1931   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1932   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNC__
1937 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1938 /*
1939     Not great since it makes two copies of the submatrix, first an SeqAIJ
1940   in local and then by concatenating the local matrices the end result.
1941   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1942 */
1943 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1944 {
1945   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1946   Mat        *local,M, Mreuse;
1947   Scalar     *vwork,*aa;
1948   MPI_Comm   comm = mat->comm;
1949   Mat_SeqAIJ *aij;
1950   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1951 
1952   PetscFunctionBegin;
1953   MPI_Comm_rank(comm,&rank);
1954   MPI_Comm_size(comm,&size);
1955 
1956   if (call ==  MAT_REUSE_MATRIX) {
1957     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1958     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
1959     local = &Mreuse;
1960     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1961   } else {
1962     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1963     Mreuse = *local;
1964     PetscFree(local);
1965   }
1966 
1967   /*
1968       m - number of local rows
1969       n - number of columns (same on all processors)
1970       rstart - first row in new global matrix generated
1971   */
1972   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1973   if (call == MAT_INITIAL_MATRIX) {
1974     aij = (Mat_SeqAIJ *) (Mreuse)->data;
1975     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
1976     ii  = aij->i;
1977     jj  = aij->j;
1978 
1979     /*
1980         Determine the number of non-zeros in the diagonal and off-diagonal
1981         portions of the matrix in order to do correct preallocation
1982     */
1983 
1984     /* first get start and end of "diagonal" columns */
1985     if (csize == PETSC_DECIDE) {
1986       nlocal = n/size + ((n % size) > rank);
1987     } else {
1988       nlocal = csize;
1989     }
1990     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1991     rstart = rend - nlocal;
1992     if (rank == size - 1 && rend != n) {
1993       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
1994     }
1995 
1996     /* next, compute all the lengths */
1997     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
1998     olens = dlens + m;
1999     for ( i=0; i<m; i++ ) {
2000       jend = ii[i+1] - ii[i];
2001       olen = 0;
2002       dlen = 0;
2003       for ( j=0; j<jend; j++ ) {
2004         if ( *jj < rstart || *jj >= rend) olen++;
2005         else dlen++;
2006         jj++;
2007       }
2008       olens[i] = olen;
2009       dlens[i] = dlen;
2010     }
2011     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2012     PetscFree(dlens);
2013   } else {
2014     int ml,nl;
2015 
2016     M = *newmat;
2017     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2018     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2019     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2020     /*
2021          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2022        rather than the slower MatSetValues().
2023     */
2024     M->was_assembled = PETSC_TRUE;
2025     M->assembled     = PETSC_FALSE;
2026   }
2027   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2028   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2029   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2030   ii  = aij->i;
2031   jj  = aij->j;
2032   aa  = aij->a;
2033   for (i=0; i<m; i++) {
2034     row   = rstart + i;
2035     nz    = ii[i+1] - ii[i];
2036     cwork = jj;     jj += nz;
2037     vwork = aa;     aa += nz;
2038     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2039   }
2040 
2041   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2042   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2043   *newmat = M;
2044 
2045   /* save submatrix used in processor for next request */
2046   if (call ==  MAT_INITIAL_MATRIX) {
2047     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2048     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2049   }
2050 
2051   PetscFunctionReturn(0);
2052 }
2053