xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 74cdf0df7c4b0e39a3e5b5b72d8b6c3b3328fa68)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.241 1998/05/06 15:28:09 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     } else {
294       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
295     }
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNC__
301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
303 {
304   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
305   MPI_Comm    comm = mat->comm;
306   int         size = aij->size, *owners = aij->rowners;
307   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
308   MPI_Request *send_waits,*recv_waits;
309   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
310   InsertMode  addv;
311   Scalar      *rvalues,*svalues;
312 
313   PetscFunctionBegin;
314   /* make sure all processors are either in INSERTMODE or ADDMODE */
315   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
316   if (addv == (ADD_VALUES|INSERT_VALUES)) {
317     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
318   }
319   mat->insertmode = addv; /* in case this processor had no cache */
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
325   for ( i=0; i<aij->stash.n; i++ ) {
326     idx = aij->stash.idx[i];
327     for ( j=0; j<size; j++ ) {
328       if (idx >= owners[j] && idx < owners[j+1]) {
329         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
330       }
331     }
332   }
333   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
334 
335   /* inform other processors of number of messages and max length*/
336   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
337   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
338   nreceives = work[rank];
339   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
340   nmax = work[rank];
341   PetscFree(work);
342 
343   /* post receives:
344        1) each message will consist of ordered pairs
345      (global index,value) we store the global index as a double
346      to simplify the message passing.
347        2) since we don't know how long each individual message is we
348      allocate the largest needed buffer for each receive. Potentially
349      this is a lot of wasted space.
350 
351 
352        This could be done better.
353   */
354   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
355   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
356   for ( i=0; i<nreceives; i++ ) {
357     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
358               comm,recv_waits+i);CHKERRQ(ierr);
359   }
360 
361   /* do sends:
362       1) starts[i] gives the starting index in svalues for stuff going to
363          the ith processor
364   */
365   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
366   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
367   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
368   starts[0]  = 0;
369   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
370   for ( i=0; i<aij->stash.n; i++ ) {
371     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
372     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
373     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
374   }
375   PetscFree(owner);
376   starts[0] = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
382     }
383   }
384   PetscFree(starts); PetscFree(nprocs);
385 
386   /* Free cache space */
387   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
388   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
389 
390   aij->svalues    = svalues;    aij->rvalues    = rvalues;
391   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
392   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
393   aij->rmax       = nmax;
394 
395   PetscFunctionReturn(0);
396 }
397 extern int MatSetUpMultiply_MPIAIJ(Mat);
398 
399 #undef __FUNC__
400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
402 {
403   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
404   MPI_Status  *send_status,recv_status;
405   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
406   int         row,col,other_disassembled;
407   Scalar      *values,val;
408   InsertMode  addv = mat->insertmode;
409 
410   PetscFunctionBegin;
411   /*  wait on receives */
412   while (count) {
413     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
414     /* unpack receives into our local space */
415     values = aij->rvalues + 3*imdex*aij->rmax;
416     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
417     n = n/3;
418     for ( i=0; i<n; i++ ) {
419       row = (int) PetscReal(values[3*i]) - aij->rstart;
420       col = (int) PetscReal(values[3*i+1]);
421       val = values[3*i+2];
422       if (col >= aij->cstart && col < aij->cend) {
423         col -= aij->cstart;
424         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
425       } else {
426         if (mat->was_assembled || mat->assembled) {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
429           }
430           col = aij->colmap[col] - 1;
431           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
432             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
433             col = (int) PetscReal(values[3*i+1]);
434           }
435         }
436         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
437       }
438     }
439     count--;
440   }
441   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
442 
443   /* wait on sends */
444   if (aij->nsends) {
445     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
446     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
447     PetscFree(send_status);
448   }
449   PetscFree(aij->send_waits); PetscFree(aij->svalues);
450 
451   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
452   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
453 
454   /* determine if any processor has disassembled, if so we must
455      also disassemble ourselfs, in order that we may reassemble. */
456   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
457   if (mat->was_assembled && !other_disassembled) {
458     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
459   }
460 
461   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
462     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
463   }
464   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
465   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
466 
467   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatZeroEntries_MPIAIJ"
473 int MatZeroEntries_MPIAIJ(Mat A)
474 {
475   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
476   int        ierr;
477 
478   PetscFunctionBegin;
479   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
480   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNC__
485 #define __FUNC__ "MatZeroRows_MPIAIJ"
486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
487 {
488   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
489   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
490   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
491   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
492   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
493   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
494   MPI_Comm       comm = A->comm;
495   MPI_Request    *send_waits,*recv_waits;
496   MPI_Status     recv_status,*send_status;
497   IS             istmp;
498   PetscTruth     localdiag;
499 
500   PetscFunctionBegin;
501   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
502   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
503 
504   /*  first count number of contributors to each processor */
505   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
506   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
507   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
508   for ( i=0; i<N; i++ ) {
509     idx = rows[i];
510     found = 0;
511     for ( j=0; j<size; j++ ) {
512       if (idx >= owners[j] && idx < owners[j+1]) {
513         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
514       }
515     }
516     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
517   }
518   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
519 
520   /* inform other processors of number of messages and max length*/
521   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
522   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
523   nrecvs = work[rank];
524   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
525   nmax = work[rank];
526   PetscFree(work);
527 
528   /* post receives:   */
529   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
530   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
531   for ( i=0; i<nrecvs; i++ ) {
532     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
533   }
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
540   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
541   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
542   starts[0] = 0;
543   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
544   for ( i=0; i<N; i++ ) {
545     svalues[starts[owner[i]]++] = rows[i];
546   }
547   ISRestoreIndices(is,&rows);
548 
549   starts[0] = 0;
550   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
551   count = 0;
552   for ( i=0; i<size; i++ ) {
553     if (procs[i]) {
554       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
555     }
556   }
557   PetscFree(starts);
558 
559   base = owners[rank];
560 
561   /*  wait on receives */
562   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
563   source = lens + nrecvs;
564   count  = nrecvs; slen = 0;
565   while (count) {
566     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
567     /* unpack receives into our local space */
568     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
569     source[imdex]  = recv_status.MPI_SOURCE;
570     lens[imdex]  = n;
571     slen += n;
572     count--;
573   }
574   PetscFree(recv_waits);
575 
576   /* move the data into the send scatter */
577   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
578   count = 0;
579   for ( i=0; i<nrecvs; i++ ) {
580     values = rvalues + i*nmax;
581     for ( j=0; j<lens[i]; j++ ) {
582       lrows[count++] = values[j] - base;
583     }
584   }
585   PetscFree(rvalues); PetscFree(lens);
586   PetscFree(owner); PetscFree(nprocs);
587 
588   /* actually zap the local rows */
589   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
590   PLogObjectParent(A,istmp);
591 
592   /*
593         Zero the required rows. If the "diagonal block" of the matrix
594      is square and the user wishes to set the diagonal we use seperate
595      code so that MatSetValues() is not called for each diagonal allocating
596      new memory, thus calling lots of mallocs and slowing things down.
597 
598        Contributed by: Mathew Knepley
599   */
600   localdiag = PETSC_FALSE;
601   if (diag && (l->A->M == l->A->N)) {
602     localdiag = PETSC_TRUE;
603     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
604   } else {
605     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
606   }
607   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
608   ierr = ISDestroy(istmp); CHKERRQ(ierr);
609 
610   if (diag && (localdiag == PETSC_FALSE)) {
611     for ( i = 0; i < slen; i++ ) {
612       row = lrows[i] + rstart;
613       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
614     }
615     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
616     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
617   }
618 
619   ierr = ISDestroy(istmp); CHKERRQ(ierr);
620 
621   if (diag) {
622     for ( i = 0; i < slen; i++ ) {
623       row = lrows[i] + rstart;
624       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
625     }
626     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
627     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
628   }
629   PetscFree(lrows);
630 
631   /* wait on sends */
632   if (nsends) {
633     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
634     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
635     PetscFree(send_status);
636   }
637   PetscFree(send_waits); PetscFree(svalues);
638 
639   PetscFunctionReturn(0);
640 }
641 
642 #undef __FUNC__
643 #define __FUNC__ "MatMult_MPIAIJ"
644 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
645 {
646   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
647   int        ierr,nt;
648 
649   PetscFunctionBegin;
650   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
651   if (nt != a->n) {
652     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
653   }
654   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
655   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
656   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
657   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 #undef __FUNC__
662 #define __FUNC__ "MatMultAdd_MPIAIJ"
663 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
664 {
665   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
666   int        ierr;
667 
668   PetscFunctionBegin;
669   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
670   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
671   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
672   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNC__
677 #define __FUNC__ "MatMultTrans_MPIAIJ"
678 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
679 {
680   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
681   int        ierr;
682 
683   PetscFunctionBegin;
684   /* do nondiagonal part */
685   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
686   /* send it on its way */
687   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
688   /* do local part */
689   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
690   /* receive remote parts: note this assumes the values are not actually */
691   /* inserted in yy until the next line, which is true for my implementation*/
692   /* but is not perhaps always true. */
693   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNC__
698 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
699 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
700 {
701   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
702   int        ierr;
703 
704   PetscFunctionBegin;
705   /* do nondiagonal part */
706   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
707   /* send it on its way */
708   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
709   /* do local part */
710   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
711   /* receive remote parts: note this assumes the values are not actually */
712   /* inserted in yy until the next line, which is true for my implementation*/
713   /* but is not perhaps always true. */
714   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 /*
719   This only works correctly for square matrices where the subblock A->A is the
720    diagonal block
721 */
722 #undef __FUNC__
723 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
724 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
725 {
726   int        ierr;
727   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
728 
729   PetscFunctionBegin;
730   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
731   if (a->rstart != a->cstart || a->rend != a->cend) {
732     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
733   }
734   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNC__
739 #define __FUNC__ "MatScale_MPIAIJ"
740 int MatScale_MPIAIJ(Scalar *aa,Mat A)
741 {
742   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
743   int        ierr;
744 
745   PetscFunctionBegin;
746   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
747   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNC__
752 #define __FUNC__ "MatDestroy_MPIAIJ"
753 int MatDestroy_MPIAIJ(Mat mat)
754 {
755   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
756   int        ierr;
757 
758   PetscFunctionBegin;
759 #if defined(USE_PETSC_LOG)
760   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
761 #endif
762   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
763   PetscFree(aij->rowners);
764   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
765   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
766   if (aij->colmap) PetscFree(aij->colmap);
767   if (aij->garray) PetscFree(aij->garray);
768   if (aij->lvec)   VecDestroy(aij->lvec);
769   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
770   if (aij->rowvalues) PetscFree(aij->rowvalues);
771   PetscFree(aij);
772   PLogObjectDestroy(mat);
773   PetscHeaderDestroy(mat);
774   PetscFunctionReturn(0);
775 }
776 
777 #undef __FUNC__
778 #define __FUNC__ "MatView_MPIAIJ_Binary"
779 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
780 {
781   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
782   int         ierr;
783 
784   PetscFunctionBegin;
785   if (aij->size == 1) {
786     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
787   }
788   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNC__
793 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
794 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
795 {
796   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
797   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
798   int         ierr, format,shift = C->indexshift,rank;
799   FILE        *fd;
800   ViewerType  vtype;
801 
802   PetscFunctionBegin;
803   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
804   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
805     ierr = ViewerGetFormat(viewer,&format);
806     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
807       MatInfo info;
808       int     flg;
809       MPI_Comm_rank(mat->comm,&rank);
810       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
811       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
812       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
813       PetscSequentialPhaseBegin(mat->comm,1);
814       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
815          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
816       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
817          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
818       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
819       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
820       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
821       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
822       fflush(fd);
823       PetscSequentialPhaseEnd(mat->comm,1);
824       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
827       PetscFunctionReturn(0);
828     }
829   }
830 
831   if (vtype == DRAW_VIEWER) {
832     Draw       draw;
833     PetscTruth isnull;
834     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
835     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
836   }
837 
838   if (vtype == ASCII_FILE_VIEWER) {
839     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
840     PetscSequentialPhaseBegin(mat->comm,1);
841     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
842            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
843            aij->cend);
844     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
845     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
846     fflush(fd);
847     PetscSequentialPhaseEnd(mat->comm,1);
848   } else {
849     int size = aij->size;
850     rank = aij->rank;
851     if (size == 1) {
852       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
853     } else {
854       /* assemble the entire matrix onto first processor. */
855       Mat         A;
856       Mat_SeqAIJ *Aloc;
857       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
858       Scalar      *a;
859 
860       if (!rank) {
861         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
862       } else {
863         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
864       }
865       PLogObjectParent(mat,A);
866 
867       /* copy over the A part */
868       Aloc = (Mat_SeqAIJ*) aij->A->data;
869       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
870       row = aij->rstart;
871       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
872       for ( i=0; i<m; i++ ) {
873         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
874         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
875       }
876       aj = Aloc->j;
877       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
878 
879       /* copy over the B part */
880       Aloc = (Mat_SeqAIJ*) aij->B->data;
881       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
882       row = aij->rstart;
883       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
884       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
885       for ( i=0; i<m; i++ ) {
886         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
887         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
888       }
889       PetscFree(ct);
890       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
891       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
892       /*
893          Everyone has to call to draw the matrix since the graphics waits are
894          synchronized across all processors that share the Draw object
895       */
896       if (!rank || vtype == DRAW_VIEWER) {
897         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
898       }
899       ierr = MatDestroy(A); CHKERRQ(ierr);
900     }
901   }
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNC__
906 #define __FUNC__ "MatView_MPIAIJ"
907 int MatView_MPIAIJ(Mat mat,Viewer viewer)
908 {
909   int         ierr;
910   ViewerType  vtype;
911 
912   PetscFunctionBegin;
913   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
914   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
915       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
916     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
917   } else if (vtype == BINARY_FILE_VIEWER) {
918     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
919   } else {
920     SETERRQ(1,1,"Viewer type not supported by PETSc object");
921   }
922   PetscFunctionReturn(0);
923 }
924 
925 /*
926     This has to provide several versions.
927 
928      2) a) use only local smoothing updating outer values only once.
929         b) local smoothing updating outer values each inner iteration
930      3) color updating out values betwen colors.
931 */
932 #undef __FUNC__
933 #define __FUNC__ "MatRelax_MPIAIJ"
934 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
935                            double fshift,int its,Vec xx)
936 {
937   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
938   Mat        AA = mat->A, BB = mat->B;
939   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
940   Scalar     *b,*x,*xs,*ls,d,*v,sum;
941   int        ierr,*idx, *diag;
942   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
943 
944   PetscFunctionBegin;
945   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
946   xs = x + shift; /* shift by one for index start of 1 */
947   ls = ls + shift;
948   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
949   diag = A->diag;
950   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
951     if (flag & SOR_ZERO_INITIAL_GUESS) {
952       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
953       PetscFunctionReturn(0);
954     }
955     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
956     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
957     while (its--) {
958       /* go down through the rows */
959       for ( i=0; i<m; i++ ) {
960         n    = A->i[i+1] - A->i[i];
961         idx  = A->j + A->i[i] + shift;
962         v    = A->a + A->i[i] + shift;
963         sum  = b[i];
964         SPARSEDENSEMDOT(sum,xs,v,idx,n);
965         d    = fshift + A->a[diag[i]+shift];
966         n    = B->i[i+1] - B->i[i];
967         idx  = B->j + B->i[i] + shift;
968         v    = B->a + B->i[i] + shift;
969         SPARSEDENSEMDOT(sum,ls,v,idx,n);
970         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
971       }
972       /* come up through the rows */
973       for ( i=m-1; i>-1; i-- ) {
974         n    = A->i[i+1] - A->i[i];
975         idx  = A->j + A->i[i] + shift;
976         v    = A->a + A->i[i] + shift;
977         sum  = b[i];
978         SPARSEDENSEMDOT(sum,xs,v,idx,n);
979         d    = fshift + A->a[diag[i]+shift];
980         n    = B->i[i+1] - B->i[i];
981         idx  = B->j + B->i[i] + shift;
982         v    = B->a + B->i[i] + shift;
983         SPARSEDENSEMDOT(sum,ls,v,idx,n);
984         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
985       }
986     }
987   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
988     if (flag & SOR_ZERO_INITIAL_GUESS) {
989       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
990       PetscFunctionReturn(0);
991     }
992     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
993     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
994     while (its--) {
995       for ( i=0; i<m; i++ ) {
996         n    = A->i[i+1] - A->i[i];
997         idx  = A->j + A->i[i] + shift;
998         v    = A->a + A->i[i] + shift;
999         sum  = b[i];
1000         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1001         d    = fshift + A->a[diag[i]+shift];
1002         n    = B->i[i+1] - B->i[i];
1003         idx  = B->j + B->i[i] + shift;
1004         v    = B->a + B->i[i] + shift;
1005         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1006         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1007       }
1008     }
1009   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1010     if (flag & SOR_ZERO_INITIAL_GUESS) {
1011       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1012       PetscFunctionReturn(0);
1013     }
1014     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1015                             mat->Mvctx); CHKERRQ(ierr);
1016     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1017                             mat->Mvctx); CHKERRQ(ierr);
1018     while (its--) {
1019       for ( i=m-1; i>-1; i-- ) {
1020         n    = A->i[i+1] - A->i[i];
1021         idx  = A->j + A->i[i] + shift;
1022         v    = A->a + A->i[i] + shift;
1023         sum  = b[i];
1024         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1025         d    = fshift + A->a[diag[i]+shift];
1026         n    = B->i[i+1] - B->i[i];
1027         idx  = B->j + B->i[i] + shift;
1028         v    = B->a + B->i[i] + shift;
1029         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1030         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1031       }
1032     }
1033   } else {
1034     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1035   }
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNC__
1040 #define __FUNC__ "MatGetInfo_MPIAIJ"
1041 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1042 {
1043   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1044   Mat        A = mat->A, B = mat->B;
1045   int        ierr;
1046   double     isend[5], irecv[5];
1047 
1048   PetscFunctionBegin;
1049   info->block_size     = 1.0;
1050   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1051   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1052   isend[3] = info->memory;  isend[4] = info->mallocs;
1053   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1054   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1055   isend[3] += info->memory;  isend[4] += info->mallocs;
1056   if (flag == MAT_LOCAL) {
1057     info->nz_used      = isend[0];
1058     info->nz_allocated = isend[1];
1059     info->nz_unneeded  = isend[2];
1060     info->memory       = isend[3];
1061     info->mallocs      = isend[4];
1062   } else if (flag == MAT_GLOBAL_MAX) {
1063     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1064     info->nz_used      = irecv[0];
1065     info->nz_allocated = irecv[1];
1066     info->nz_unneeded  = irecv[2];
1067     info->memory       = irecv[3];
1068     info->mallocs      = irecv[4];
1069   } else if (flag == MAT_GLOBAL_SUM) {
1070     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1071     info->nz_used      = irecv[0];
1072     info->nz_allocated = irecv[1];
1073     info->nz_unneeded  = irecv[2];
1074     info->memory       = irecv[3];
1075     info->mallocs      = irecv[4];
1076   }
1077   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1078   info->fill_ratio_needed = 0;
1079   info->factor_mallocs    = 0;
1080   info->rows_global       = (double)mat->M;
1081   info->columns_global    = (double)mat->N;
1082   info->rows_local        = (double)mat->m;
1083   info->columns_local     = (double)mat->N;
1084 
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ "MatSetOption_MPIAIJ"
1090 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1093 
1094   PetscFunctionBegin;
1095   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1096       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1097       op == MAT_COLUMNS_UNSORTED ||
1098       op == MAT_COLUMNS_SORTED ||
1099       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1100       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1101         MatSetOption(a->A,op);
1102         MatSetOption(a->B,op);
1103   } else if (op == MAT_ROW_ORIENTED) {
1104     a->roworiented = 1;
1105     MatSetOption(a->A,op);
1106     MatSetOption(a->B,op);
1107   } else if (op == MAT_ROWS_SORTED ||
1108              op == MAT_ROWS_UNSORTED ||
1109              op == MAT_SYMMETRIC ||
1110              op == MAT_STRUCTURALLY_SYMMETRIC ||
1111              op == MAT_YES_NEW_DIAGONALS)
1112     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1113   else if (op == MAT_COLUMN_ORIENTED) {
1114     a->roworiented = 0;
1115     MatSetOption(a->A,op);
1116     MatSetOption(a->B,op);
1117   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1118     a->donotstash = 1;
1119   } else if (op == MAT_NO_NEW_DIAGONALS){
1120     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1121   } else {
1122     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1123   }
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNC__
1128 #define __FUNC__ "MatGetSize_MPIAIJ"
1129 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1130 {
1131   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1132 
1133   PetscFunctionBegin;
1134   if (m) *m = mat->M;
1135   if (n) *n = mat->N;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNC__
1140 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1141 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1142 {
1143   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1144 
1145   PetscFunctionBegin;
1146   if (m) *m = mat->m;
1147   if (n) *n = mat->n;
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNC__
1152 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1153 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1154 {
1155   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1156 
1157   PetscFunctionBegin;
1158   *m = mat->rstart; *n = mat->rend;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1163 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1164 
1165 #undef __FUNC__
1166 #define __FUNC__ "MatGetRow_MPIAIJ"
1167 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1168 {
1169   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1170   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1171   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1172   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1173   int        *cmap, *idx_p;
1174 
1175   PetscFunctionBegin;
1176   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1177   mat->getrowactive = PETSC_TRUE;
1178 
1179   if (!mat->rowvalues && (idx || v)) {
1180     /*
1181         allocate enough space to hold information from the longest row.
1182     */
1183     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1184     int     max = 1,m = mat->m,tmp;
1185     for ( i=0; i<m; i++ ) {
1186       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1187       if (max < tmp) { max = tmp; }
1188     }
1189     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1190     CHKPTRQ(mat->rowvalues);
1191     mat->rowindices = (int *) (mat->rowvalues + max);
1192   }
1193 
1194   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1195   lrow = row - rstart;
1196 
1197   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1198   if (!v)   {pvA = 0; pvB = 0;}
1199   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1200   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1201   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1202   nztot = nzA + nzB;
1203 
1204   cmap  = mat->garray;
1205   if (v  || idx) {
1206     if (nztot) {
1207       /* Sort by increasing column numbers, assuming A and B already sorted */
1208       int imark = -1;
1209       if (v) {
1210         *v = v_p = mat->rowvalues;
1211         for ( i=0; i<nzB; i++ ) {
1212           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1213           else break;
1214         }
1215         imark = i;
1216         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1217         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1218       }
1219       if (idx) {
1220         *idx = idx_p = mat->rowindices;
1221         if (imark > -1) {
1222           for ( i=0; i<imark; i++ ) {
1223             idx_p[i] = cmap[cworkB[i]];
1224           }
1225         } else {
1226           for ( i=0; i<nzB; i++ ) {
1227             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1228             else break;
1229           }
1230           imark = i;
1231         }
1232         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1233         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1234       }
1235     }
1236     else {
1237       if (idx) *idx = 0;
1238       if (v)   *v   = 0;
1239     }
1240   }
1241   *nz = nztot;
1242   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1243   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNC__
1248 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1249 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1250 {
1251   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1252 
1253   PetscFunctionBegin;
1254   if (aij->getrowactive == PETSC_FALSE) {
1255     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1256   }
1257   aij->getrowactive = PETSC_FALSE;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNC__
1262 #define __FUNC__ "MatNorm_MPIAIJ"
1263 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1264 {
1265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1266   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1267   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1268   double     sum = 0.0;
1269   Scalar     *v;
1270 
1271   PetscFunctionBegin;
1272   if (aij->size == 1) {
1273     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1274   } else {
1275     if (type == NORM_FROBENIUS) {
1276       v = amat->a;
1277       for (i=0; i<amat->nz; i++ ) {
1278 #if defined(USE_PETSC_COMPLEX)
1279         sum += real(conj(*v)*(*v)); v++;
1280 #else
1281         sum += (*v)*(*v); v++;
1282 #endif
1283       }
1284       v = bmat->a;
1285       for (i=0; i<bmat->nz; i++ ) {
1286 #if defined(USE_PETSC_COMPLEX)
1287         sum += real(conj(*v)*(*v)); v++;
1288 #else
1289         sum += (*v)*(*v); v++;
1290 #endif
1291       }
1292       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1293       *norm = sqrt(*norm);
1294     } else if (type == NORM_1) { /* max column norm */
1295       double *tmp, *tmp2;
1296       int    *jj, *garray = aij->garray;
1297       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1298       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1299       PetscMemzero(tmp,aij->N*sizeof(double));
1300       *norm = 0.0;
1301       v = amat->a; jj = amat->j;
1302       for ( j=0; j<amat->nz; j++ ) {
1303         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1304       }
1305       v = bmat->a; jj = bmat->j;
1306       for ( j=0; j<bmat->nz; j++ ) {
1307         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1308       }
1309       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1310       for ( j=0; j<aij->N; j++ ) {
1311         if (tmp2[j] > *norm) *norm = tmp2[j];
1312       }
1313       PetscFree(tmp); PetscFree(tmp2);
1314     } else if (type == NORM_INFINITY) { /* max row norm */
1315       double ntemp = 0.0;
1316       for ( j=0; j<amat->m; j++ ) {
1317         v = amat->a + amat->i[j] + shift;
1318         sum = 0.0;
1319         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1320           sum += PetscAbsScalar(*v); v++;
1321         }
1322         v = bmat->a + bmat->i[j] + shift;
1323         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1324           sum += PetscAbsScalar(*v); v++;
1325         }
1326         if (sum > ntemp) ntemp = sum;
1327       }
1328       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1329     } else {
1330       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1331     }
1332   }
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNC__
1337 #define __FUNC__ "MatTranspose_MPIAIJ"
1338 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1339 {
1340   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1341   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1342   int        ierr,shift = Aloc->indexshift;
1343   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1344   Mat        B;
1345   Scalar     *array;
1346 
1347   PetscFunctionBegin;
1348   if (matout == PETSC_NULL && M != N) {
1349     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1350   }
1351 
1352   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1353 
1354   /* copy over the A part */
1355   Aloc = (Mat_SeqAIJ*) a->A->data;
1356   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1357   row = a->rstart;
1358   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1359   for ( i=0; i<m; i++ ) {
1360     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1361     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1362   }
1363   aj = Aloc->j;
1364   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1365 
1366   /* copy over the B part */
1367   Aloc = (Mat_SeqAIJ*) a->B->data;
1368   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1369   row = a->rstart;
1370   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1371   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1372   for ( i=0; i<m; i++ ) {
1373     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1374     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1375   }
1376   PetscFree(ct);
1377   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1378   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1379   if (matout != PETSC_NULL) {
1380     *matout = B;
1381   } else {
1382     PetscOps       *Abops;
1383     struct _MatOps *Aops;
1384 
1385     /* This isn't really an in-place transpose .... but free data structures from a */
1386     PetscFree(a->rowners);
1387     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1388     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1389     if (a->colmap) PetscFree(a->colmap);
1390     if (a->garray) PetscFree(a->garray);
1391     if (a->lvec) VecDestroy(a->lvec);
1392     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1393     PetscFree(a);
1394 
1395     /*
1396        This is horrible, horrible code. We need to keep the
1397       A pointers for the bops and ops but copy everything
1398       else from C.
1399     */
1400     Abops = A->bops;
1401     Aops  = A->ops;
1402     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1403     A->bops = Abops;
1404     A->ops  = Aops;
1405     PetscHeaderDestroy(B);
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNC__
1411 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1413 {
1414   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1415   Mat a = aij->A, b = aij->B;
1416   int ierr,s1,s2,s3;
1417 
1418   PetscFunctionBegin;
1419   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1420   if (rr) {
1421     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1422     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1423     /* Overlap communication with computation. */
1424     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1425   }
1426   if (ll) {
1427     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1428     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1429     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1430   }
1431   /* scale  the diagonal block */
1432   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1433 
1434   if (rr) {
1435     /* Do a scatter end and then right scale the off-diagonal block */
1436     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1437     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1438   }
1439 
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 
1444 extern int MatPrintHelp_SeqAIJ(Mat);
1445 #undef __FUNC__
1446 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1447 int MatPrintHelp_MPIAIJ(Mat A)
1448 {
1449   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1450   int        ierr;
1451 
1452   PetscFunctionBegin;
1453   if (!a->rank) {
1454     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNC__
1460 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1462 {
1463   PetscFunctionBegin;
1464   *bs = 1;
1465   PetscFunctionReturn(0);
1466 }
1467 #undef __FUNC__
1468 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1469 int MatSetUnfactored_MPIAIJ(Mat A)
1470 {
1471   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1472   int        ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNC__
1480 #define __FUNC__ "MatEqual_MPIAIJ"
1481 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1482 {
1483   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1484   Mat        a, b, c, d;
1485   PetscTruth flg;
1486   int        ierr;
1487 
1488   PetscFunctionBegin;
1489   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1490   a = matA->A; b = matA->B;
1491   c = matB->A; d = matB->B;
1492 
1493   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1494   if (flg == PETSC_TRUE) {
1495     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1496   }
1497   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1502 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1503 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1504 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1505 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1506 
1507 /* -------------------------------------------------------------------*/
1508 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1509        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1510        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1511        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1512        0,0,
1513        0,0,
1514        0,0,
1515        MatRelax_MPIAIJ,
1516        MatTranspose_MPIAIJ,
1517        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1518        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1519        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1520        0,
1521        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1522        0,0,0,0,
1523        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1524        0,0,
1525        0,0,MatConvertSameType_MPIAIJ,0,0,
1526        0,0,0,
1527        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1528        MatPrintHelp_MPIAIJ,
1529        MatScale_MPIAIJ,0,0,0,
1530        MatGetBlockSize_MPIAIJ,0,0,0,0,
1531        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1532        0,0,MatGetSubMatrix_MPIAIJ};
1533 
1534 
1535 #undef __FUNC__
1536 #define __FUNC__ "MatCreateMPIAIJ"
1537 /*@C
1538    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1539    (the default parallel PETSc format).  For good matrix assembly performance
1540    the user should preallocate the matrix storage by setting the parameters
1541    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1542    performance can be increased by more than a factor of 50.
1543 
1544    Collective on MPI_Comm
1545 
1546    Input Parameters:
1547 +  comm - MPI communicator
1548 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1549            This value should be the same as the local size used in creating the
1550            y vector for the matrix-vector product y = Ax.
1551 .  n - This value should be the same as the local size used in creating the
1552        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1553        calculated if N is given) For square matrices n is almost always m.
1554 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1555 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1556 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1557            (same for all local rows)
1558 .  d_nzz - array containing the number of nonzeros in the various rows of the
1559            diagonal portion of the local submatrix (possibly different for each row)
1560            or PETSC_NULL. You must leave room for the diagonal entry even if
1561            it is zero.
1562 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1563            submatrix (same for all local rows).
1564 -  o_nzz - array containing the number of nonzeros in the various rows of the
1565            off-diagonal portion of the local submatrix (possibly different for
1566            each row) or PETSC_NULL.
1567 
1568    Output Parameter:
1569 .  A - the matrix
1570 
1571    Notes:
1572    The AIJ format (also called the Yale sparse matrix format or
1573    compressed row storage), is fully compatible with standard Fortran 77
1574    storage.  That is, the stored row and column indices can begin at
1575    either one (as in Fortran) or zero.  See the users manual for details.
1576 
1577    The user MUST specify either the local or global matrix dimensions
1578    (possibly both).
1579 
1580    By default, this format uses inodes (identical nodes) when possible.
1581    We search for consecutive rows with the same nonzero structure, thereby
1582    reusing matrix information to achieve increased efficiency.
1583 
1584    Options Database Keys:
1585 +  -mat_aij_no_inode  - Do not use inodes
1586 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1587 -  -mat_aij_oneindex - Internally use indexing starting at 1
1588         rather than 0.  Note that when calling MatSetValues(),
1589         the user still MUST index entries starting at 0!
1590 
1591    Storage Information:
1592    For a square global matrix we define each processor's diagonal portion
1593    to be its local rows and the corresponding columns (a square submatrix);
1594    each processor's off-diagonal portion encompasses the remainder of the
1595    local matrix (a rectangular submatrix).
1596 
1597    The user can specify preallocated storage for the diagonal part of
1598    the local submatrix with either d_nz or d_nnz (not both).  Set
1599    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1600    memory allocation.  Likewise, specify preallocated storage for the
1601    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1602 
1603    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1604    the figure below we depict these three local rows and all columns (0-11).
1605 
1606 .vb
1607              0 1 2 3 4 5 6 7 8 9 10 11
1608             -------------------
1609      row 3  |  o o o d d d o o o o o o
1610      row 4  |  o o o d d d o o o o o o
1611      row 5  |  o o o d d d o o o o o o
1612             -------------------
1613 .ve
1614 
1615    Thus, any entries in the d locations are stored in the d (diagonal)
1616    submatrix, and any entries in the o locations are stored in the
1617    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1618    stored simply in the MATSEQAIJ format for compressed row storage.
1619 
1620    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1621    and o_nz should indicate the number of nonzeros per row in the o matrix.
1622    In general, for PDE problems in which most nonzeros are near the diagonal,
1623    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1624    or you will get TERRIBLE performance; see the users' manual chapter on
1625    matrices.
1626 
1627 .keywords: matrix, aij, compressed row, sparse, parallel
1628 
1629 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1630 @*/
1631 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1632 {
1633   Mat          B;
1634   Mat_MPIAIJ   *b;
1635   int          ierr, i,sum[2],work[2],size;
1636 
1637   PetscFunctionBegin;
1638   MPI_Comm_size(comm,&size);
1639   if (size == 1) {
1640     if (M == PETSC_DECIDE) M = m;
1641     if (N == PETSC_DECIDE) N = n;
1642     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1643     PetscFunctionReturn(0);
1644   }
1645 
1646   *A = 0;
1647   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1648   PLogObjectCreate(B);
1649   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1650   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1651   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1652   B->ops->destroy    = MatDestroy_MPIAIJ;
1653   B->ops->view       = MatView_MPIAIJ;
1654   B->factor     = 0;
1655   B->assembled  = PETSC_FALSE;
1656   B->mapping    = 0;
1657 
1658   B->insertmode = NOT_SET_VALUES;
1659   b->size       = size;
1660   MPI_Comm_rank(comm,&b->rank);
1661 
1662   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1663     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1664   }
1665 
1666   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1667     work[0] = m; work[1] = n;
1668     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1669     if (M == PETSC_DECIDE) M = sum[0];
1670     if (N == PETSC_DECIDE) N = sum[1];
1671   }
1672   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1673   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1674   b->m = m; B->m = m;
1675   b->n = n; B->n = n;
1676   b->N = N; B->N = N;
1677   b->M = M; B->M = M;
1678 
1679   /* build local table of row and column ownerships */
1680   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1681   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1682   b->cowners = b->rowners + b->size + 2;
1683   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1684   b->rowners[0] = 0;
1685   for ( i=2; i<=b->size; i++ ) {
1686     b->rowners[i] += b->rowners[i-1];
1687   }
1688   b->rstart = b->rowners[b->rank];
1689   b->rend   = b->rowners[b->rank+1];
1690   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1691   b->cowners[0] = 0;
1692   for ( i=2; i<=b->size; i++ ) {
1693     b->cowners[i] += b->cowners[i-1];
1694   }
1695   b->cstart = b->cowners[b->rank];
1696   b->cend   = b->cowners[b->rank+1];
1697 
1698   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1699   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1700   PLogObjectParent(B,b->A);
1701   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1702   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1703   PLogObjectParent(B,b->B);
1704 
1705   /* build cache for off array entries formed */
1706   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1707   b->donotstash  = 0;
1708   b->colmap      = 0;
1709   b->garray      = 0;
1710   b->roworiented = 1;
1711 
1712   /* stuff used for matrix vector multiply */
1713   b->lvec      = 0;
1714   b->Mvctx     = 0;
1715 
1716   /* stuff for MatGetRow() */
1717   b->rowindices   = 0;
1718   b->rowvalues    = 0;
1719   b->getrowactive = PETSC_FALSE;
1720 
1721   *A = B;
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNC__
1726 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1727 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1728 {
1729   Mat        mat;
1730   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1731   int        ierr, len=0, flg;
1732 
1733   PetscFunctionBegin;
1734   *newmat       = 0;
1735   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1736   PLogObjectCreate(mat);
1737   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1738   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1739   mat->ops->destroy    = MatDestroy_MPIAIJ;
1740   mat->ops->view       = MatView_MPIAIJ;
1741   mat->factor     = matin->factor;
1742   mat->assembled  = PETSC_TRUE;
1743 
1744   a->m = mat->m   = oldmat->m;
1745   a->n = mat->n   = oldmat->n;
1746   a->M = mat->M   = oldmat->M;
1747   a->N = mat->N   = oldmat->N;
1748 
1749   a->rstart       = oldmat->rstart;
1750   a->rend         = oldmat->rend;
1751   a->cstart       = oldmat->cstart;
1752   a->cend         = oldmat->cend;
1753   a->size         = oldmat->size;
1754   a->rank         = oldmat->rank;
1755   mat->insertmode = NOT_SET_VALUES;
1756   a->rowvalues    = 0;
1757   a->getrowactive = PETSC_FALSE;
1758 
1759   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1760   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1761   a->cowners = a->rowners + a->size + 2;
1762   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1763   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1764   if (oldmat->colmap) {
1765     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1766     PLogObjectMemory(mat,(a->N)*sizeof(int));
1767     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1768   } else a->colmap = 0;
1769   if (oldmat->garray) {
1770     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1771     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1772     PLogObjectMemory(mat,len*sizeof(int));
1773     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1774   } else a->garray = 0;
1775 
1776   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1777   PLogObjectParent(mat,a->lvec);
1778   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1779   PLogObjectParent(mat,a->Mvctx);
1780   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1781   PLogObjectParent(mat,a->A);
1782   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1783   PLogObjectParent(mat,a->B);
1784   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1785   if (flg) {
1786     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1787   }
1788   *newmat = mat;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #include "sys.h"
1793 
1794 #undef __FUNC__
1795 #define __FUNC__ "MatLoad_MPIAIJ"
1796 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1797 {
1798   Mat          A;
1799   Scalar       *vals,*svals;
1800   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1801   MPI_Status   status;
1802   int          i, nz, ierr, j,rstart, rend, fd;
1803   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1804   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1805   int          tag = ((PetscObject)viewer)->tag;
1806 
1807   PetscFunctionBegin;
1808   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1809   if (!rank) {
1810     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1811     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1812     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1813     if (header[3] < 0) {
1814       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1815     }
1816   }
1817 
1818 
1819   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1820   M = header[1]; N = header[2];
1821   /* determine ownership of all rows */
1822   m = M/size + ((M % size) > rank);
1823   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1824   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1825   rowners[0] = 0;
1826   for ( i=2; i<=size; i++ ) {
1827     rowners[i] += rowners[i-1];
1828   }
1829   rstart = rowners[rank];
1830   rend   = rowners[rank+1];
1831 
1832   /* distribute row lengths to all processors */
1833   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1834   offlens = ourlens + (rend-rstart);
1835   if (!rank) {
1836     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1837     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1838     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1839     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1840     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1841     PetscFree(sndcounts);
1842   } else {
1843     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1844   }
1845 
1846   if (!rank) {
1847     /* calculate the number of nonzeros on each processor */
1848     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1849     PetscMemzero(procsnz,size*sizeof(int));
1850     for ( i=0; i<size; i++ ) {
1851       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1852         procsnz[i] += rowlengths[j];
1853       }
1854     }
1855     PetscFree(rowlengths);
1856 
1857     /* determine max buffer needed and allocate it */
1858     maxnz = 0;
1859     for ( i=0; i<size; i++ ) {
1860       maxnz = PetscMax(maxnz,procsnz[i]);
1861     }
1862     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1863 
1864     /* read in my part of the matrix column indices  */
1865     nz     = procsnz[0];
1866     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1867     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1868 
1869     /* read in every one elses and ship off */
1870     for ( i=1; i<size; i++ ) {
1871       nz   = procsnz[i];
1872       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1873       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1874     }
1875     PetscFree(cols);
1876   } else {
1877     /* determine buffer space needed for message */
1878     nz = 0;
1879     for ( i=0; i<m; i++ ) {
1880       nz += ourlens[i];
1881     }
1882     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1883 
1884     /* receive message of column indices*/
1885     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1886     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1887     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1888   }
1889 
1890   /* loop over local rows, determining number of off diagonal entries */
1891   PetscMemzero(offlens,m*sizeof(int));
1892   jj = 0;
1893   for ( i=0; i<m; i++ ) {
1894     for ( j=0; j<ourlens[i]; j++ ) {
1895       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1896       jj++;
1897     }
1898   }
1899 
1900   /* create our matrix */
1901   for ( i=0; i<m; i++ ) {
1902     ourlens[i] -= offlens[i];
1903   }
1904   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1905   A = *newmat;
1906   MatSetOption(A,MAT_COLUMNS_SORTED);
1907   for ( i=0; i<m; i++ ) {
1908     ourlens[i] += offlens[i];
1909   }
1910 
1911   if (!rank) {
1912     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1913 
1914     /* read in my part of the matrix numerical values  */
1915     nz = procsnz[0];
1916     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1917 
1918     /* insert into matrix */
1919     jj      = rstart;
1920     smycols = mycols;
1921     svals   = vals;
1922     for ( i=0; i<m; i++ ) {
1923       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1924       smycols += ourlens[i];
1925       svals   += ourlens[i];
1926       jj++;
1927     }
1928 
1929     /* read in other processors and ship out */
1930     for ( i=1; i<size; i++ ) {
1931       nz   = procsnz[i];
1932       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1933       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1934     }
1935     PetscFree(procsnz);
1936   } else {
1937     /* receive numeric values */
1938     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1939 
1940     /* receive message of values*/
1941     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1942     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1943     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1944 
1945     /* insert into matrix */
1946     jj      = rstart;
1947     smycols = mycols;
1948     svals   = vals;
1949     for ( i=0; i<m; i++ ) {
1950       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1951       smycols += ourlens[i];
1952       svals   += ourlens[i];
1953       jj++;
1954     }
1955   }
1956   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1957 
1958   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1959   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNC__
1964 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1965 /*
1966     Not great since it makes two copies of the submatrix, first an SeqAIJ
1967   in local and then by concatenating the local matrices the end result.
1968   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1969 */
1970 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1971 {
1972   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1973   Mat        *local,M, Mreuse;
1974   Scalar     *vwork,*aa;
1975   MPI_Comm   comm = mat->comm;
1976   Mat_SeqAIJ *aij;
1977   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1978 
1979   PetscFunctionBegin;
1980   MPI_Comm_rank(comm,&rank);
1981   MPI_Comm_size(comm,&size);
1982 
1983   if (call ==  MAT_REUSE_MATRIX) {
1984     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1985     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
1986     local = &Mreuse;
1987     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1988   } else {
1989     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1990     Mreuse = *local;
1991     PetscFree(local);
1992   }
1993 
1994   /*
1995       m - number of local rows
1996       n - number of columns (same on all processors)
1997       rstart - first row in new global matrix generated
1998   */
1999   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2000   if (call == MAT_INITIAL_MATRIX) {
2001     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2002     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2003     ii  = aij->i;
2004     jj  = aij->j;
2005 
2006     /*
2007         Determine the number of non-zeros in the diagonal and off-diagonal
2008         portions of the matrix in order to do correct preallocation
2009     */
2010 
2011     /* first get start and end of "diagonal" columns */
2012     if (csize == PETSC_DECIDE) {
2013       nlocal = n/size + ((n % size) > rank);
2014     } else {
2015       nlocal = csize;
2016     }
2017     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2018     rstart = rend - nlocal;
2019     if (rank == size - 1 && rend != n) {
2020       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2021     }
2022 
2023     /* next, compute all the lengths */
2024     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2025     olens = dlens + m;
2026     for ( i=0; i<m; i++ ) {
2027       jend = ii[i+1] - ii[i];
2028       olen = 0;
2029       dlen = 0;
2030       for ( j=0; j<jend; j++ ) {
2031         if ( *jj < rstart || *jj >= rend) olen++;
2032         else dlen++;
2033         jj++;
2034       }
2035       olens[i] = olen;
2036       dlens[i] = dlen;
2037     }
2038     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2039     PetscFree(dlens);
2040   } else {
2041     int ml,nl;
2042 
2043     M = *newmat;
2044     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2046     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2047     /*
2048          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2049        rather than the slower MatSetValues().
2050     */
2051     M->was_assembled = PETSC_TRUE;
2052     M->assembled     = PETSC_FALSE;
2053   }
2054   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2055   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2056   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2057   ii  = aij->i;
2058   jj  = aij->j;
2059   aa  = aij->a;
2060   for (i=0; i<m; i++) {
2061     row   = rstart + i;
2062     nz    = ii[i+1] - ii[i];
2063     cwork = jj;     jj += nz;
2064     vwork = aa;     aa += nz;
2065     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2066   }
2067 
2068   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2070   *newmat = M;
2071 
2072   /* save submatrix used in processor for next request */
2073   if (call ==  MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2075     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2076   }
2077 
2078   PetscFunctionReturn(0);
2079 }
2080