xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision eadb2fb48eb3181ff06464bbc092555ed2ce5415)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.242 1998/05/06 15:30:30 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     } else {
294       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
295     }
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNC__
301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
303 {
304   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
305   MPI_Comm    comm = mat->comm;
306   int         size = aij->size, *owners = aij->rowners;
307   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
308   MPI_Request *send_waits,*recv_waits;
309   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
310   InsertMode  addv;
311   Scalar      *rvalues,*svalues;
312 
313   PetscFunctionBegin;
314   /* make sure all processors are either in INSERTMODE or ADDMODE */
315   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
316   if (addv == (ADD_VALUES|INSERT_VALUES)) {
317     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
318   }
319   mat->insertmode = addv; /* in case this processor had no cache */
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
325   for ( i=0; i<aij->stash.n; i++ ) {
326     idx = aij->stash.idx[i];
327     for ( j=0; j<size; j++ ) {
328       if (idx >= owners[j] && idx < owners[j+1]) {
329         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
330       }
331     }
332   }
333   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
334 
335   /* inform other processors of number of messages and max length*/
336   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
337   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
338   nreceives = work[rank];
339   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
340   nmax = work[rank];
341   PetscFree(work);
342 
343   /* post receives:
344        1) each message will consist of ordered pairs
345      (global index,value) we store the global index as a double
346      to simplify the message passing.
347        2) since we don't know how long each individual message is we
348      allocate the largest needed buffer for each receive. Potentially
349      this is a lot of wasted space.
350 
351 
352        This could be done better.
353   */
354   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
355   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
356   for ( i=0; i<nreceives; i++ ) {
357     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
358               comm,recv_waits+i);CHKERRQ(ierr);
359   }
360 
361   /* do sends:
362       1) starts[i] gives the starting index in svalues for stuff going to
363          the ith processor
364   */
365   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
366   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
367   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
368   starts[0]  = 0;
369   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
370   for ( i=0; i<aij->stash.n; i++ ) {
371     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
372     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
373     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
374   }
375   PetscFree(owner);
376   starts[0] = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
382     }
383   }
384   PetscFree(starts); PetscFree(nprocs);
385 
386   /* Free cache space */
387   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
388   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
389 
390   aij->svalues    = svalues;    aij->rvalues    = rvalues;
391   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
392   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
393   aij->rmax       = nmax;
394 
395   PetscFunctionReturn(0);
396 }
397 extern int MatSetUpMultiply_MPIAIJ(Mat);
398 
399 #undef __FUNC__
400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
402 {
403   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
404   MPI_Status  *send_status,recv_status;
405   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
406   int         row,col,other_disassembled;
407   Scalar      *values,val;
408   InsertMode  addv = mat->insertmode;
409 
410   PetscFunctionBegin;
411   /*  wait on receives */
412   while (count) {
413     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
414     /* unpack receives into our local space */
415     values = aij->rvalues + 3*imdex*aij->rmax;
416     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
417     n = n/3;
418     for ( i=0; i<n; i++ ) {
419       row = (int) PetscReal(values[3*i]) - aij->rstart;
420       col = (int) PetscReal(values[3*i+1]);
421       val = values[3*i+2];
422       if (col >= aij->cstart && col < aij->cend) {
423         col -= aij->cstart;
424         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
425       } else {
426         if (mat->was_assembled || mat->assembled) {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
429           }
430           col = aij->colmap[col] - 1;
431           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
432             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
433             col = (int) PetscReal(values[3*i+1]);
434           }
435         }
436         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
437       }
438     }
439     count--;
440   }
441   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
442 
443   /* wait on sends */
444   if (aij->nsends) {
445     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
446     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
447     PetscFree(send_status);
448   }
449   PetscFree(aij->send_waits); PetscFree(aij->svalues);
450 
451   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
452   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
453 
454   /* determine if any processor has disassembled, if so we must
455      also disassemble ourselfs, in order that we may reassemble. */
456   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
457   if (mat->was_assembled && !other_disassembled) {
458     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
459   }
460 
461   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
462     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
463   }
464   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
465   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
466 
467   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatZeroEntries_MPIAIJ"
473 int MatZeroEntries_MPIAIJ(Mat A)
474 {
475   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
476   int        ierr;
477 
478   PetscFunctionBegin;
479   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
480   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNC__
485 #define __FUNC__ "MatZeroRows_MPIAIJ"
486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
487 {
488   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
489   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
490   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
491   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
492   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
493   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
494   MPI_Comm       comm = A->comm;
495   MPI_Request    *send_waits,*recv_waits;
496   MPI_Status     recv_status,*send_status;
497   IS             istmp;
498   PetscTruth     localdiag;
499 
500   PetscFunctionBegin;
501   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
502   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
503 
504   /*  first count number of contributors to each processor */
505   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
506   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
507   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
508   for ( i=0; i<N; i++ ) {
509     idx = rows[i];
510     found = 0;
511     for ( j=0; j<size; j++ ) {
512       if (idx >= owners[j] && idx < owners[j+1]) {
513         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
514       }
515     }
516     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
517   }
518   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
519 
520   /* inform other processors of number of messages and max length*/
521   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
522   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
523   nrecvs = work[rank];
524   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
525   nmax = work[rank];
526   PetscFree(work);
527 
528   /* post receives:   */
529   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
530   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
531   for ( i=0; i<nrecvs; i++ ) {
532     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
533   }
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
540   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
541   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
542   starts[0] = 0;
543   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
544   for ( i=0; i<N; i++ ) {
545     svalues[starts[owner[i]]++] = rows[i];
546   }
547   ISRestoreIndices(is,&rows);
548 
549   starts[0] = 0;
550   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
551   count = 0;
552   for ( i=0; i<size; i++ ) {
553     if (procs[i]) {
554       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
555     }
556   }
557   PetscFree(starts);
558 
559   base = owners[rank];
560 
561   /*  wait on receives */
562   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
563   source = lens + nrecvs;
564   count  = nrecvs; slen = 0;
565   while (count) {
566     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
567     /* unpack receives into our local space */
568     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
569     source[imdex]  = recv_status.MPI_SOURCE;
570     lens[imdex]  = n;
571     slen += n;
572     count--;
573   }
574   PetscFree(recv_waits);
575 
576   /* move the data into the send scatter */
577   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
578   count = 0;
579   for ( i=0; i<nrecvs; i++ ) {
580     values = rvalues + i*nmax;
581     for ( j=0; j<lens[i]; j++ ) {
582       lrows[count++] = values[j] - base;
583     }
584   }
585   PetscFree(rvalues); PetscFree(lens);
586   PetscFree(owner); PetscFree(nprocs);
587 
588   /* actually zap the local rows */
589   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
590   PLogObjectParent(A,istmp);
591 
592   /*
593         Zero the required rows. If the "diagonal block" of the matrix
594      is square and the user wishes to set the diagonal we use seperate
595      code so that MatSetValues() is not called for each diagonal allocating
596      new memory, thus calling lots of mallocs and slowing things down.
597 
598        Contributed by: Mathew Knepley
599   */
600   localdiag = PETSC_FALSE;
601   if (diag && (l->A->M == l->A->N)) {
602     localdiag = PETSC_TRUE;
603     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
604   } else {
605     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
606   }
607   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
608   ierr = ISDestroy(istmp); CHKERRQ(ierr);
609 
610   if (diag && (localdiag == PETSC_FALSE)) {
611     for ( i = 0; i < slen; i++ ) {
612       row = lrows[i] + rstart;
613       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
614     }
615     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
616     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
617   }
618 
619   if (diag) {
620     for ( i = 0; i < slen; i++ ) {
621       row = lrows[i] + rstart;
622       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
623     }
624     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
625     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
626   }
627   PetscFree(lrows);
628 
629   /* wait on sends */
630   if (nsends) {
631     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
632     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
633     PetscFree(send_status);
634   }
635   PetscFree(send_waits); PetscFree(svalues);
636 
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNC__
641 #define __FUNC__ "MatMult_MPIAIJ"
642 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
645   int        ierr,nt;
646 
647   PetscFunctionBegin;
648   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
649   if (nt != a->n) {
650     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
651   }
652   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
653   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
654   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
655   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNC__
660 #define __FUNC__ "MatMultAdd_MPIAIJ"
661 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
664   int        ierr;
665 
666   PetscFunctionBegin;
667   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
668   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
669   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
670   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatMultTrans_MPIAIJ"
676 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
697 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
698 {
699   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
700   int        ierr;
701 
702   PetscFunctionBegin;
703   /* do nondiagonal part */
704   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
705   /* send it on its way */
706   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
707   /* do local part */
708   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
709   /* receive remote parts: note this assumes the values are not actually */
710   /* inserted in yy until the next line, which is true for my implementation*/
711   /* but is not perhaps always true. */
712   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 /*
717   This only works correctly for square matrices where the subblock A->A is the
718    diagonal block
719 */
720 #undef __FUNC__
721 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
722 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
723 {
724   int        ierr;
725   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
726 
727   PetscFunctionBegin;
728   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
729   if (a->rstart != a->cstart || a->rend != a->cend) {
730     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
731   }
732   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNC__
737 #define __FUNC__ "MatScale_MPIAIJ"
738 int MatScale_MPIAIJ(Scalar *aa,Mat A)
739 {
740   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
741   int        ierr;
742 
743   PetscFunctionBegin;
744   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
745   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNC__
750 #define __FUNC__ "MatDestroy_MPIAIJ"
751 int MatDestroy_MPIAIJ(Mat mat)
752 {
753   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
754   int        ierr;
755 
756   PetscFunctionBegin;
757 #if defined(USE_PETSC_LOG)
758   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
759 #endif
760   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
761   PetscFree(aij->rowners);
762   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
763   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
764   if (aij->colmap) PetscFree(aij->colmap);
765   if (aij->garray) PetscFree(aij->garray);
766   if (aij->lvec)   VecDestroy(aij->lvec);
767   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
768   if (aij->rowvalues) PetscFree(aij->rowvalues);
769   PetscFree(aij);
770   PLogObjectDestroy(mat);
771   PetscHeaderDestroy(mat);
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNC__
776 #define __FUNC__ "MatView_MPIAIJ_Binary"
777 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
778 {
779   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
780   int         ierr;
781 
782   PetscFunctionBegin;
783   if (aij->size == 1) {
784     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
785   }
786   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNC__
791 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
792 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
793 {
794   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
795   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
796   int         ierr, format,shift = C->indexshift,rank;
797   FILE        *fd;
798   ViewerType  vtype;
799 
800   PetscFunctionBegin;
801   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
802   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
803     ierr = ViewerGetFormat(viewer,&format);
804     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
805       MatInfo info;
806       int     flg;
807       MPI_Comm_rank(mat->comm,&rank);
808       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
809       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
810       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
811       PetscSequentialPhaseBegin(mat->comm,1);
812       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
813          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
814       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
815          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
816       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
817       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
818       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
819       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
820       fflush(fd);
821       PetscSequentialPhaseEnd(mat->comm,1);
822       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
823       PetscFunctionReturn(0);
824     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (vtype == DRAW_VIEWER) {
830     Draw       draw;
831     PetscTruth isnull;
832     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
833     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
834   }
835 
836   if (vtype == ASCII_FILE_VIEWER) {
837     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
838     PetscSequentialPhaseBegin(mat->comm,1);
839     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
840            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
841            aij->cend);
842     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
843     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
844     fflush(fd);
845     PetscSequentialPhaseEnd(mat->comm,1);
846   } else {
847     int size = aij->size;
848     rank = aij->rank;
849     if (size == 1) {
850       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
851     } else {
852       /* assemble the entire matrix onto first processor. */
853       Mat         A;
854       Mat_SeqAIJ *Aloc;
855       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
856       Scalar      *a;
857 
858       if (!rank) {
859         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
860       } else {
861         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
862       }
863       PLogObjectParent(mat,A);
864 
865       /* copy over the A part */
866       Aloc = (Mat_SeqAIJ*) aij->A->data;
867       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
868       row = aij->rstart;
869       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
870       for ( i=0; i<m; i++ ) {
871         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
872         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
873       }
874       aj = Aloc->j;
875       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
876 
877       /* copy over the B part */
878       Aloc = (Mat_SeqAIJ*) aij->B->data;
879       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
880       row = aij->rstart;
881       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
882       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
883       for ( i=0; i<m; i++ ) {
884         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
885         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
886       }
887       PetscFree(ct);
888       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
889       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
890       /*
891          Everyone has to call to draw the matrix since the graphics waits are
892          synchronized across all processors that share the Draw object
893       */
894       if (!rank || vtype == DRAW_VIEWER) {
895         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
896       }
897       ierr = MatDestroy(A); CHKERRQ(ierr);
898     }
899   }
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNC__
904 #define __FUNC__ "MatView_MPIAIJ"
905 int MatView_MPIAIJ(Mat mat,Viewer viewer)
906 {
907   int         ierr;
908   ViewerType  vtype;
909 
910   PetscFunctionBegin;
911   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
912   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
913       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
914     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
915   } else if (vtype == BINARY_FILE_VIEWER) {
916     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
917   } else {
918     SETERRQ(1,1,"Viewer type not supported by PETSc object");
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 /*
924     This has to provide several versions.
925 
926      2) a) use only local smoothing updating outer values only once.
927         b) local smoothing updating outer values each inner iteration
928      3) color updating out values betwen colors.
929 */
930 #undef __FUNC__
931 #define __FUNC__ "MatRelax_MPIAIJ"
932 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
933                            double fshift,int its,Vec xx)
934 {
935   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
936   Mat        AA = mat->A, BB = mat->B;
937   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
938   Scalar     *b,*x,*xs,*ls,d,*v,sum;
939   int        ierr,*idx, *diag;
940   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
941 
942   PetscFunctionBegin;
943   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
944   xs = x + shift; /* shift by one for index start of 1 */
945   ls = ls + shift;
946   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
947   diag = A->diag;
948   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
949     if (flag & SOR_ZERO_INITIAL_GUESS) {
950       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
951       PetscFunctionReturn(0);
952     }
953     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
954     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
955     while (its--) {
956       /* go down through the rows */
957       for ( i=0; i<m; i++ ) {
958         n    = A->i[i+1] - A->i[i];
959         idx  = A->j + A->i[i] + shift;
960         v    = A->a + A->i[i] + shift;
961         sum  = b[i];
962         SPARSEDENSEMDOT(sum,xs,v,idx,n);
963         d    = fshift + A->a[diag[i]+shift];
964         n    = B->i[i+1] - B->i[i];
965         idx  = B->j + B->i[i] + shift;
966         v    = B->a + B->i[i] + shift;
967         SPARSEDENSEMDOT(sum,ls,v,idx,n);
968         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
969       }
970       /* come up through the rows */
971       for ( i=m-1; i>-1; i-- ) {
972         n    = A->i[i+1] - A->i[i];
973         idx  = A->j + A->i[i] + shift;
974         v    = A->a + A->i[i] + shift;
975         sum  = b[i];
976         SPARSEDENSEMDOT(sum,xs,v,idx,n);
977         d    = fshift + A->a[diag[i]+shift];
978         n    = B->i[i+1] - B->i[i];
979         idx  = B->j + B->i[i] + shift;
980         v    = B->a + B->i[i] + shift;
981         SPARSEDENSEMDOT(sum,ls,v,idx,n);
982         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
983       }
984     }
985   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
986     if (flag & SOR_ZERO_INITIAL_GUESS) {
987       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
988       PetscFunctionReturn(0);
989     }
990     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
991     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
992     while (its--) {
993       for ( i=0; i<m; i++ ) {
994         n    = A->i[i+1] - A->i[i];
995         idx  = A->j + A->i[i] + shift;
996         v    = A->a + A->i[i] + shift;
997         sum  = b[i];
998         SPARSEDENSEMDOT(sum,xs,v,idx,n);
999         d    = fshift + A->a[diag[i]+shift];
1000         n    = B->i[i+1] - B->i[i];
1001         idx  = B->j + B->i[i] + shift;
1002         v    = B->a + B->i[i] + shift;
1003         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1004         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1005       }
1006     }
1007   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1008     if (flag & SOR_ZERO_INITIAL_GUESS) {
1009       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     }
1012     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1013                             mat->Mvctx); CHKERRQ(ierr);
1014     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1015                             mat->Mvctx); CHKERRQ(ierr);
1016     while (its--) {
1017       for ( i=m-1; i>-1; i-- ) {
1018         n    = A->i[i+1] - A->i[i];
1019         idx  = A->j + A->i[i] + shift;
1020         v    = A->a + A->i[i] + shift;
1021         sum  = b[i];
1022         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1023         d    = fshift + A->a[diag[i]+shift];
1024         n    = B->i[i+1] - B->i[i];
1025         idx  = B->j + B->i[i] + shift;
1026         v    = B->a + B->i[i] + shift;
1027         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1028         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1029       }
1030     }
1031   } else {
1032     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNC__
1038 #define __FUNC__ "MatGetInfo_MPIAIJ"
1039 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1040 {
1041   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1042   Mat        A = mat->A, B = mat->B;
1043   int        ierr;
1044   double     isend[5], irecv[5];
1045 
1046   PetscFunctionBegin;
1047   info->block_size     = 1.0;
1048   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1049   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1050   isend[3] = info->memory;  isend[4] = info->mallocs;
1051   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1052   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1053   isend[3] += info->memory;  isend[4] += info->mallocs;
1054   if (flag == MAT_LOCAL) {
1055     info->nz_used      = isend[0];
1056     info->nz_allocated = isend[1];
1057     info->nz_unneeded  = isend[2];
1058     info->memory       = isend[3];
1059     info->mallocs      = isend[4];
1060   } else if (flag == MAT_GLOBAL_MAX) {
1061     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1062     info->nz_used      = irecv[0];
1063     info->nz_allocated = irecv[1];
1064     info->nz_unneeded  = irecv[2];
1065     info->memory       = irecv[3];
1066     info->mallocs      = irecv[4];
1067   } else if (flag == MAT_GLOBAL_SUM) {
1068     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1069     info->nz_used      = irecv[0];
1070     info->nz_allocated = irecv[1];
1071     info->nz_unneeded  = irecv[2];
1072     info->memory       = irecv[3];
1073     info->mallocs      = irecv[4];
1074   }
1075   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1076   info->fill_ratio_needed = 0;
1077   info->factor_mallocs    = 0;
1078   info->rows_global       = (double)mat->M;
1079   info->columns_global    = (double)mat->N;
1080   info->rows_local        = (double)mat->m;
1081   info->columns_local     = (double)mat->N;
1082 
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNC__
1087 #define __FUNC__ "MatSetOption_MPIAIJ"
1088 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1089 {
1090   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1091 
1092   PetscFunctionBegin;
1093   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1094       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1095       op == MAT_COLUMNS_UNSORTED ||
1096       op == MAT_COLUMNS_SORTED ||
1097       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1098       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1099         MatSetOption(a->A,op);
1100         MatSetOption(a->B,op);
1101   } else if (op == MAT_ROW_ORIENTED) {
1102     a->roworiented = 1;
1103     MatSetOption(a->A,op);
1104     MatSetOption(a->B,op);
1105   } else if (op == MAT_ROWS_SORTED ||
1106              op == MAT_ROWS_UNSORTED ||
1107              op == MAT_SYMMETRIC ||
1108              op == MAT_STRUCTURALLY_SYMMETRIC ||
1109              op == MAT_YES_NEW_DIAGONALS)
1110     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1111   else if (op == MAT_COLUMN_ORIENTED) {
1112     a->roworiented = 0;
1113     MatSetOption(a->A,op);
1114     MatSetOption(a->B,op);
1115   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1116     a->donotstash = 1;
1117   } else if (op == MAT_NO_NEW_DIAGONALS){
1118     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1119   } else {
1120     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1121   }
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNC__
1126 #define __FUNC__ "MatGetSize_MPIAIJ"
1127 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1128 {
1129   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1130 
1131   PetscFunctionBegin;
1132   if (m) *m = mat->M;
1133   if (n) *n = mat->N;
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNC__
1138 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1139 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1140 {
1141   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1142 
1143   PetscFunctionBegin;
1144   if (m) *m = mat->m;
1145   if (n) *n = mat->n;
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNC__
1150 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1151 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1152 {
1153   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1154 
1155   PetscFunctionBegin;
1156   *m = mat->rstart; *n = mat->rend;
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1161 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1162 
1163 #undef __FUNC__
1164 #define __FUNC__ "MatGetRow_MPIAIJ"
1165 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1166 {
1167   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1168   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1169   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1170   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1171   int        *cmap, *idx_p;
1172 
1173   PetscFunctionBegin;
1174   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1175   mat->getrowactive = PETSC_TRUE;
1176 
1177   if (!mat->rowvalues && (idx || v)) {
1178     /*
1179         allocate enough space to hold information from the longest row.
1180     */
1181     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1182     int     max = 1,m = mat->m,tmp;
1183     for ( i=0; i<m; i++ ) {
1184       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1185       if (max < tmp) { max = tmp; }
1186     }
1187     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1188     CHKPTRQ(mat->rowvalues);
1189     mat->rowindices = (int *) (mat->rowvalues + max);
1190   }
1191 
1192   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1193   lrow = row - rstart;
1194 
1195   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1196   if (!v)   {pvA = 0; pvB = 0;}
1197   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1198   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1199   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1200   nztot = nzA + nzB;
1201 
1202   cmap  = mat->garray;
1203   if (v  || idx) {
1204     if (nztot) {
1205       /* Sort by increasing column numbers, assuming A and B already sorted */
1206       int imark = -1;
1207       if (v) {
1208         *v = v_p = mat->rowvalues;
1209         for ( i=0; i<nzB; i++ ) {
1210           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1211           else break;
1212         }
1213         imark = i;
1214         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1215         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1216       }
1217       if (idx) {
1218         *idx = idx_p = mat->rowindices;
1219         if (imark > -1) {
1220           for ( i=0; i<imark; i++ ) {
1221             idx_p[i] = cmap[cworkB[i]];
1222           }
1223         } else {
1224           for ( i=0; i<nzB; i++ ) {
1225             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1226             else break;
1227           }
1228           imark = i;
1229         }
1230         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1231         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1232       }
1233     }
1234     else {
1235       if (idx) *idx = 0;
1236       if (v)   *v   = 0;
1237     }
1238   }
1239   *nz = nztot;
1240   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1241   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNC__
1246 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1247 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1248 {
1249   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1250 
1251   PetscFunctionBegin;
1252   if (aij->getrowactive == PETSC_FALSE) {
1253     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1254   }
1255   aij->getrowactive = PETSC_FALSE;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNC__
1260 #define __FUNC__ "MatNorm_MPIAIJ"
1261 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1262 {
1263   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1264   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1265   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1266   double     sum = 0.0;
1267   Scalar     *v;
1268 
1269   PetscFunctionBegin;
1270   if (aij->size == 1) {
1271     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1272   } else {
1273     if (type == NORM_FROBENIUS) {
1274       v = amat->a;
1275       for (i=0; i<amat->nz; i++ ) {
1276 #if defined(USE_PETSC_COMPLEX)
1277         sum += real(conj(*v)*(*v)); v++;
1278 #else
1279         sum += (*v)*(*v); v++;
1280 #endif
1281       }
1282       v = bmat->a;
1283       for (i=0; i<bmat->nz; i++ ) {
1284 #if defined(USE_PETSC_COMPLEX)
1285         sum += real(conj(*v)*(*v)); v++;
1286 #else
1287         sum += (*v)*(*v); v++;
1288 #endif
1289       }
1290       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1291       *norm = sqrt(*norm);
1292     } else if (type == NORM_1) { /* max column norm */
1293       double *tmp, *tmp2;
1294       int    *jj, *garray = aij->garray;
1295       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1296       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1297       PetscMemzero(tmp,aij->N*sizeof(double));
1298       *norm = 0.0;
1299       v = amat->a; jj = amat->j;
1300       for ( j=0; j<amat->nz; j++ ) {
1301         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1302       }
1303       v = bmat->a; jj = bmat->j;
1304       for ( j=0; j<bmat->nz; j++ ) {
1305         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1306       }
1307       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1308       for ( j=0; j<aij->N; j++ ) {
1309         if (tmp2[j] > *norm) *norm = tmp2[j];
1310       }
1311       PetscFree(tmp); PetscFree(tmp2);
1312     } else if (type == NORM_INFINITY) { /* max row norm */
1313       double ntemp = 0.0;
1314       for ( j=0; j<amat->m; j++ ) {
1315         v = amat->a + amat->i[j] + shift;
1316         sum = 0.0;
1317         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1318           sum += PetscAbsScalar(*v); v++;
1319         }
1320         v = bmat->a + bmat->i[j] + shift;
1321         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1322           sum += PetscAbsScalar(*v); v++;
1323         }
1324         if (sum > ntemp) ntemp = sum;
1325       }
1326       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1327     } else {
1328       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1329     }
1330   }
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNC__
1335 #define __FUNC__ "MatTranspose_MPIAIJ"
1336 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1337 {
1338   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1339   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1340   int        ierr,shift = Aloc->indexshift;
1341   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1342   Mat        B;
1343   Scalar     *array;
1344 
1345   PetscFunctionBegin;
1346   if (matout == PETSC_NULL && M != N) {
1347     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1348   }
1349 
1350   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1351 
1352   /* copy over the A part */
1353   Aloc = (Mat_SeqAIJ*) a->A->data;
1354   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1355   row = a->rstart;
1356   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1357   for ( i=0; i<m; i++ ) {
1358     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1359     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1360   }
1361   aj = Aloc->j;
1362   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1363 
1364   /* copy over the B part */
1365   Aloc = (Mat_SeqAIJ*) a->B->data;
1366   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1367   row = a->rstart;
1368   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1369   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1370   for ( i=0; i<m; i++ ) {
1371     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1372     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1373   }
1374   PetscFree(ct);
1375   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1376   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1377   if (matout != PETSC_NULL) {
1378     *matout = B;
1379   } else {
1380     PetscOps       *Abops;
1381     struct _MatOps *Aops;
1382 
1383     /* This isn't really an in-place transpose .... but free data structures from a */
1384     PetscFree(a->rowners);
1385     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1386     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1387     if (a->colmap) PetscFree(a->colmap);
1388     if (a->garray) PetscFree(a->garray);
1389     if (a->lvec) VecDestroy(a->lvec);
1390     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1391     PetscFree(a);
1392 
1393     /*
1394        This is horrible, horrible code. We need to keep the
1395       A pointers for the bops and ops but copy everything
1396       else from C.
1397     */
1398     Abops = A->bops;
1399     Aops  = A->ops;
1400     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1401     A->bops = Abops;
1402     A->ops  = Aops;
1403     PetscHeaderDestroy(B);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNC__
1409 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1410 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1411 {
1412   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1413   Mat a = aij->A, b = aij->B;
1414   int ierr,s1,s2,s3;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1418   if (rr) {
1419     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1420     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1421     /* Overlap communication with computation. */
1422     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1423   }
1424   if (ll) {
1425     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1426     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1427     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1428   }
1429   /* scale  the diagonal block */
1430   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1431 
1432   if (rr) {
1433     /* Do a scatter end and then right scale the off-diagonal block */
1434     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1435     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1436   }
1437 
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 
1442 extern int MatPrintHelp_SeqAIJ(Mat);
1443 #undef __FUNC__
1444 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1445 int MatPrintHelp_MPIAIJ(Mat A)
1446 {
1447   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1448   int        ierr;
1449 
1450   PetscFunctionBegin;
1451   if (!a->rank) {
1452     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNC__
1458 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1459 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1460 {
1461   PetscFunctionBegin;
1462   *bs = 1;
1463   PetscFunctionReturn(0);
1464 }
1465 #undef __FUNC__
1466 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1467 int MatSetUnfactored_MPIAIJ(Mat A)
1468 {
1469   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1470   int        ierr;
1471 
1472   PetscFunctionBegin;
1473   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNC__
1478 #define __FUNC__ "MatEqual_MPIAIJ"
1479 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1480 {
1481   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1482   Mat        a, b, c, d;
1483   PetscTruth flg;
1484   int        ierr;
1485 
1486   PetscFunctionBegin;
1487   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1488   a = matA->A; b = matA->B;
1489   c = matB->A; d = matB->B;
1490 
1491   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1492   if (flg == PETSC_TRUE) {
1493     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1494   }
1495   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1500 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1501 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1502 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1503 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1504 
1505 /* -------------------------------------------------------------------*/
1506 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1507        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1508        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1509        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1510        0,0,
1511        0,0,
1512        0,0,
1513        MatRelax_MPIAIJ,
1514        MatTranspose_MPIAIJ,
1515        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1516        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1517        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1518        0,
1519        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1520        0,0,0,0,
1521        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1522        0,0,
1523        0,0,MatConvertSameType_MPIAIJ,0,0,
1524        0,0,0,
1525        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1526        MatPrintHelp_MPIAIJ,
1527        MatScale_MPIAIJ,0,0,0,
1528        MatGetBlockSize_MPIAIJ,0,0,0,0,
1529        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1530        0,0,MatGetSubMatrix_MPIAIJ};
1531 
1532 
1533 #undef __FUNC__
1534 #define __FUNC__ "MatCreateMPIAIJ"
1535 /*@C
1536    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1537    (the default parallel PETSc format).  For good matrix assembly performance
1538    the user should preallocate the matrix storage by setting the parameters
1539    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1540    performance can be increased by more than a factor of 50.
1541 
1542    Collective on MPI_Comm
1543 
1544    Input Parameters:
1545 +  comm - MPI communicator
1546 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1547            This value should be the same as the local size used in creating the
1548            y vector for the matrix-vector product y = Ax.
1549 .  n - This value should be the same as the local size used in creating the
1550        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1551        calculated if N is given) For square matrices n is almost always m.
1552 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1553 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1554 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1555            (same for all local rows)
1556 .  d_nzz - array containing the number of nonzeros in the various rows of the
1557            diagonal portion of the local submatrix (possibly different for each row)
1558            or PETSC_NULL. You must leave room for the diagonal entry even if
1559            it is zero.
1560 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1561            submatrix (same for all local rows).
1562 -  o_nzz - array containing the number of nonzeros in the various rows of the
1563            off-diagonal portion of the local submatrix (possibly different for
1564            each row) or PETSC_NULL.
1565 
1566    Output Parameter:
1567 .  A - the matrix
1568 
1569    Notes:
1570    The AIJ format (also called the Yale sparse matrix format or
1571    compressed row storage), is fully compatible with standard Fortran 77
1572    storage.  That is, the stored row and column indices can begin at
1573    either one (as in Fortran) or zero.  See the users manual for details.
1574 
1575    The user MUST specify either the local or global matrix dimensions
1576    (possibly both).
1577 
1578    By default, this format uses inodes (identical nodes) when possible.
1579    We search for consecutive rows with the same nonzero structure, thereby
1580    reusing matrix information to achieve increased efficiency.
1581 
1582    Options Database Keys:
1583 +  -mat_aij_no_inode  - Do not use inodes
1584 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1585 -  -mat_aij_oneindex - Internally use indexing starting at 1
1586         rather than 0.  Note that when calling MatSetValues(),
1587         the user still MUST index entries starting at 0!
1588 
1589    Storage Information:
1590    For a square global matrix we define each processor's diagonal portion
1591    to be its local rows and the corresponding columns (a square submatrix);
1592    each processor's off-diagonal portion encompasses the remainder of the
1593    local matrix (a rectangular submatrix).
1594 
1595    The user can specify preallocated storage for the diagonal part of
1596    the local submatrix with either d_nz or d_nnz (not both).  Set
1597    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1598    memory allocation.  Likewise, specify preallocated storage for the
1599    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1600 
1601    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1602    the figure below we depict these three local rows and all columns (0-11).
1603 
1604 .vb
1605              0 1 2 3 4 5 6 7 8 9 10 11
1606             -------------------
1607      row 3  |  o o o d d d o o o o o o
1608      row 4  |  o o o d d d o o o o o o
1609      row 5  |  o o o d d d o o o o o o
1610             -------------------
1611 .ve
1612 
1613    Thus, any entries in the d locations are stored in the d (diagonal)
1614    submatrix, and any entries in the o locations are stored in the
1615    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1616    stored simply in the MATSEQAIJ format for compressed row storage.
1617 
1618    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1619    and o_nz should indicate the number of nonzeros per row in the o matrix.
1620    In general, for PDE problems in which most nonzeros are near the diagonal,
1621    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1622    or you will get TERRIBLE performance; see the users' manual chapter on
1623    matrices.
1624 
1625 .keywords: matrix, aij, compressed row, sparse, parallel
1626 
1627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1628 @*/
1629 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1630 {
1631   Mat          B;
1632   Mat_MPIAIJ   *b;
1633   int          ierr, i,sum[2],work[2],size;
1634 
1635   PetscFunctionBegin;
1636   MPI_Comm_size(comm,&size);
1637   if (size == 1) {
1638     if (M == PETSC_DECIDE) M = m;
1639     if (N == PETSC_DECIDE) N = n;
1640     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1641     PetscFunctionReturn(0);
1642   }
1643 
1644   *A = 0;
1645   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1646   PLogObjectCreate(B);
1647   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1648   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1649   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1650   B->ops->destroy    = MatDestroy_MPIAIJ;
1651   B->ops->view       = MatView_MPIAIJ;
1652   B->factor     = 0;
1653   B->assembled  = PETSC_FALSE;
1654   B->mapping    = 0;
1655 
1656   B->insertmode = NOT_SET_VALUES;
1657   b->size       = size;
1658   MPI_Comm_rank(comm,&b->rank);
1659 
1660   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1661     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1662   }
1663 
1664   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1665     work[0] = m; work[1] = n;
1666     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1667     if (M == PETSC_DECIDE) M = sum[0];
1668     if (N == PETSC_DECIDE) N = sum[1];
1669   }
1670   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1671   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1672   b->m = m; B->m = m;
1673   b->n = n; B->n = n;
1674   b->N = N; B->N = N;
1675   b->M = M; B->M = M;
1676 
1677   /* build local table of row and column ownerships */
1678   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1679   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1680   b->cowners = b->rowners + b->size + 2;
1681   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1682   b->rowners[0] = 0;
1683   for ( i=2; i<=b->size; i++ ) {
1684     b->rowners[i] += b->rowners[i-1];
1685   }
1686   b->rstart = b->rowners[b->rank];
1687   b->rend   = b->rowners[b->rank+1];
1688   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1689   b->cowners[0] = 0;
1690   for ( i=2; i<=b->size; i++ ) {
1691     b->cowners[i] += b->cowners[i-1];
1692   }
1693   b->cstart = b->cowners[b->rank];
1694   b->cend   = b->cowners[b->rank+1];
1695 
1696   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1697   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1698   PLogObjectParent(B,b->A);
1699   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1700   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1701   PLogObjectParent(B,b->B);
1702 
1703   /* build cache for off array entries formed */
1704   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1705   b->donotstash  = 0;
1706   b->colmap      = 0;
1707   b->garray      = 0;
1708   b->roworiented = 1;
1709 
1710   /* stuff used for matrix vector multiply */
1711   b->lvec      = 0;
1712   b->Mvctx     = 0;
1713 
1714   /* stuff for MatGetRow() */
1715   b->rowindices   = 0;
1716   b->rowvalues    = 0;
1717   b->getrowactive = PETSC_FALSE;
1718 
1719   *A = B;
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNC__
1724 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1725 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1726 {
1727   Mat        mat;
1728   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1729   int        ierr, len=0, flg;
1730 
1731   PetscFunctionBegin;
1732   *newmat       = 0;
1733   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1734   PLogObjectCreate(mat);
1735   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1736   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1737   mat->ops->destroy    = MatDestroy_MPIAIJ;
1738   mat->ops->view       = MatView_MPIAIJ;
1739   mat->factor     = matin->factor;
1740   mat->assembled  = PETSC_TRUE;
1741 
1742   a->m = mat->m   = oldmat->m;
1743   a->n = mat->n   = oldmat->n;
1744   a->M = mat->M   = oldmat->M;
1745   a->N = mat->N   = oldmat->N;
1746 
1747   a->rstart       = oldmat->rstart;
1748   a->rend         = oldmat->rend;
1749   a->cstart       = oldmat->cstart;
1750   a->cend         = oldmat->cend;
1751   a->size         = oldmat->size;
1752   a->rank         = oldmat->rank;
1753   mat->insertmode = NOT_SET_VALUES;
1754   a->rowvalues    = 0;
1755   a->getrowactive = PETSC_FALSE;
1756 
1757   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1758   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1759   a->cowners = a->rowners + a->size + 2;
1760   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1761   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1762   if (oldmat->colmap) {
1763     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1764     PLogObjectMemory(mat,(a->N)*sizeof(int));
1765     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1766   } else a->colmap = 0;
1767   if (oldmat->garray) {
1768     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1769     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1770     PLogObjectMemory(mat,len*sizeof(int));
1771     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1772   } else a->garray = 0;
1773 
1774   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1775   PLogObjectParent(mat,a->lvec);
1776   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1777   PLogObjectParent(mat,a->Mvctx);
1778   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1779   PLogObjectParent(mat,a->A);
1780   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1781   PLogObjectParent(mat,a->B);
1782   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1785   }
1786   *newmat = mat;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #include "sys.h"
1791 
1792 #undef __FUNC__
1793 #define __FUNC__ "MatLoad_MPIAIJ"
1794 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1795 {
1796   Mat          A;
1797   Scalar       *vals,*svals;
1798   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1799   MPI_Status   status;
1800   int          i, nz, ierr, j,rstart, rend, fd;
1801   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1802   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1803   int          tag = ((PetscObject)viewer)->tag;
1804 
1805   PetscFunctionBegin;
1806   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1807   if (!rank) {
1808     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1809     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1810     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1811     if (header[3] < 0) {
1812       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1813     }
1814   }
1815 
1816 
1817   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1818   M = header[1]; N = header[2];
1819   /* determine ownership of all rows */
1820   m = M/size + ((M % size) > rank);
1821   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1822   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1823   rowners[0] = 0;
1824   for ( i=2; i<=size; i++ ) {
1825     rowners[i] += rowners[i-1];
1826   }
1827   rstart = rowners[rank];
1828   rend   = rowners[rank+1];
1829 
1830   /* distribute row lengths to all processors */
1831   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1832   offlens = ourlens + (rend-rstart);
1833   if (!rank) {
1834     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1835     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1836     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1837     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1838     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1839     PetscFree(sndcounts);
1840   } else {
1841     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1842   }
1843 
1844   if (!rank) {
1845     /* calculate the number of nonzeros on each processor */
1846     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1847     PetscMemzero(procsnz,size*sizeof(int));
1848     for ( i=0; i<size; i++ ) {
1849       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1850         procsnz[i] += rowlengths[j];
1851       }
1852     }
1853     PetscFree(rowlengths);
1854 
1855     /* determine max buffer needed and allocate it */
1856     maxnz = 0;
1857     for ( i=0; i<size; i++ ) {
1858       maxnz = PetscMax(maxnz,procsnz[i]);
1859     }
1860     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1861 
1862     /* read in my part of the matrix column indices  */
1863     nz     = procsnz[0];
1864     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1865     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1866 
1867     /* read in every one elses and ship off */
1868     for ( i=1; i<size; i++ ) {
1869       nz   = procsnz[i];
1870       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1871       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1872     }
1873     PetscFree(cols);
1874   } else {
1875     /* determine buffer space needed for message */
1876     nz = 0;
1877     for ( i=0; i<m; i++ ) {
1878       nz += ourlens[i];
1879     }
1880     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1881 
1882     /* receive message of column indices*/
1883     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1884     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1885     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1886   }
1887 
1888   /* loop over local rows, determining number of off diagonal entries */
1889   PetscMemzero(offlens,m*sizeof(int));
1890   jj = 0;
1891   for ( i=0; i<m; i++ ) {
1892     for ( j=0; j<ourlens[i]; j++ ) {
1893       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1894       jj++;
1895     }
1896   }
1897 
1898   /* create our matrix */
1899   for ( i=0; i<m; i++ ) {
1900     ourlens[i] -= offlens[i];
1901   }
1902   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1903   A = *newmat;
1904   MatSetOption(A,MAT_COLUMNS_SORTED);
1905   for ( i=0; i<m; i++ ) {
1906     ourlens[i] += offlens[i];
1907   }
1908 
1909   if (!rank) {
1910     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1911 
1912     /* read in my part of the matrix numerical values  */
1913     nz = procsnz[0];
1914     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1915 
1916     /* insert into matrix */
1917     jj      = rstart;
1918     smycols = mycols;
1919     svals   = vals;
1920     for ( i=0; i<m; i++ ) {
1921       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1922       smycols += ourlens[i];
1923       svals   += ourlens[i];
1924       jj++;
1925     }
1926 
1927     /* read in other processors and ship out */
1928     for ( i=1; i<size; i++ ) {
1929       nz   = procsnz[i];
1930       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1931       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1932     }
1933     PetscFree(procsnz);
1934   } else {
1935     /* receive numeric values */
1936     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1937 
1938     /* receive message of values*/
1939     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1940     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1941     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1942 
1943     /* insert into matrix */
1944     jj      = rstart;
1945     smycols = mycols;
1946     svals   = vals;
1947     for ( i=0; i<m; i++ ) {
1948       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1949       smycols += ourlens[i];
1950       svals   += ourlens[i];
1951       jj++;
1952     }
1953   }
1954   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1955 
1956   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1957   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNC__
1962 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1963 /*
1964     Not great since it makes two copies of the submatrix, first an SeqAIJ
1965   in local and then by concatenating the local matrices the end result.
1966   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1967 */
1968 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1969 {
1970   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1971   Mat        *local,M, Mreuse;
1972   Scalar     *vwork,*aa;
1973   MPI_Comm   comm = mat->comm;
1974   Mat_SeqAIJ *aij;
1975   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1976 
1977   PetscFunctionBegin;
1978   MPI_Comm_rank(comm,&rank);
1979   MPI_Comm_size(comm,&size);
1980 
1981   if (call ==  MAT_REUSE_MATRIX) {
1982     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1983     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
1984     local = &Mreuse;
1985     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1986   } else {
1987     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1988     Mreuse = *local;
1989     PetscFree(local);
1990   }
1991 
1992   /*
1993       m - number of local rows
1994       n - number of columns (same on all processors)
1995       rstart - first row in new global matrix generated
1996   */
1997   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1998   if (call == MAT_INITIAL_MATRIX) {
1999     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2000     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2001     ii  = aij->i;
2002     jj  = aij->j;
2003 
2004     /*
2005         Determine the number of non-zeros in the diagonal and off-diagonal
2006         portions of the matrix in order to do correct preallocation
2007     */
2008 
2009     /* first get start and end of "diagonal" columns */
2010     if (csize == PETSC_DECIDE) {
2011       nlocal = n/size + ((n % size) > rank);
2012     } else {
2013       nlocal = csize;
2014     }
2015     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2016     rstart = rend - nlocal;
2017     if (rank == size - 1 && rend != n) {
2018       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2019     }
2020 
2021     /* next, compute all the lengths */
2022     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2023     olens = dlens + m;
2024     for ( i=0; i<m; i++ ) {
2025       jend = ii[i+1] - ii[i];
2026       olen = 0;
2027       dlen = 0;
2028       for ( j=0; j<jend; j++ ) {
2029         if ( *jj < rstart || *jj >= rend) olen++;
2030         else dlen++;
2031         jj++;
2032       }
2033       olens[i] = olen;
2034       dlens[i] = dlen;
2035     }
2036     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2037     PetscFree(dlens);
2038   } else {
2039     int ml,nl;
2040 
2041     M = *newmat;
2042     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2043     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2044     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2045     /*
2046          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2047        rather than the slower MatSetValues().
2048     */
2049     M->was_assembled = PETSC_TRUE;
2050     M->assembled     = PETSC_FALSE;
2051   }
2052   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2053   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2054   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2055   ii  = aij->i;
2056   jj  = aij->j;
2057   aa  = aij->a;
2058   for (i=0; i<m; i++) {
2059     row   = rstart + i;
2060     nz    = ii[i+1] - ii[i];
2061     cwork = jj;     jj += nz;
2062     vwork = aa;     aa += nz;
2063     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2064   }
2065 
2066   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2067   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2068   *newmat = M;
2069 
2070   /* save submatrix used in processor for next request */
2071   if (call ==  MAT_INITIAL_MATRIX) {
2072     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2073     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2074   }
2075 
2076   PetscFunctionReturn(0);
2077 }
2078