xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6eb55b6ad38d1c8da43f76fa76928da417b92a79)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.240 1998/04/27 03:54:07 curfman Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     } else {
294       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
295     }
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNC__
301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
303 {
304   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
305   MPI_Comm    comm = mat->comm;
306   int         size = aij->size, *owners = aij->rowners;
307   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
308   MPI_Request *send_waits,*recv_waits;
309   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
310   InsertMode  addv;
311   Scalar      *rvalues,*svalues;
312 
313   PetscFunctionBegin;
314   /* make sure all processors are either in INSERTMODE or ADDMODE */
315   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
316   if (addv == (ADD_VALUES|INSERT_VALUES)) {
317     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
318   }
319   mat->insertmode = addv; /* in case this processor had no cache */
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
325   for ( i=0; i<aij->stash.n; i++ ) {
326     idx = aij->stash.idx[i];
327     for ( j=0; j<size; j++ ) {
328       if (idx >= owners[j] && idx < owners[j+1]) {
329         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
330       }
331     }
332   }
333   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
334 
335   /* inform other processors of number of messages and max length*/
336   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
337   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
338   nreceives = work[rank];
339   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
340   nmax = work[rank];
341   PetscFree(work);
342 
343   /* post receives:
344        1) each message will consist of ordered pairs
345      (global index,value) we store the global index as a double
346      to simplify the message passing.
347        2) since we don't know how long each individual message is we
348      allocate the largest needed buffer for each receive. Potentially
349      this is a lot of wasted space.
350 
351 
352        This could be done better.
353   */
354   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
355   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
356   for ( i=0; i<nreceives; i++ ) {
357     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
358               comm,recv_waits+i);CHKERRQ(ierr);
359   }
360 
361   /* do sends:
362       1) starts[i] gives the starting index in svalues for stuff going to
363          the ith processor
364   */
365   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
366   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
367   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
368   starts[0]  = 0;
369   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
370   for ( i=0; i<aij->stash.n; i++ ) {
371     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
372     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
373     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
374   }
375   PetscFree(owner);
376   starts[0] = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
382     }
383   }
384   PetscFree(starts); PetscFree(nprocs);
385 
386   /* Free cache space */
387   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
388   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
389 
390   aij->svalues    = svalues;    aij->rvalues    = rvalues;
391   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
392   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
393   aij->rmax       = nmax;
394 
395   PetscFunctionReturn(0);
396 }
397 extern int MatSetUpMultiply_MPIAIJ(Mat);
398 
399 #undef __FUNC__
400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
402 {
403   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
404   MPI_Status  *send_status,recv_status;
405   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
406   int         row,col,other_disassembled;
407   Scalar      *values,val;
408   InsertMode  addv = mat->insertmode;
409 
410   PetscFunctionBegin;
411   /*  wait on receives */
412   while (count) {
413     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
414     /* unpack receives into our local space */
415     values = aij->rvalues + 3*imdex*aij->rmax;
416     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
417     n = n/3;
418     for ( i=0; i<n; i++ ) {
419       row = (int) PetscReal(values[3*i]) - aij->rstart;
420       col = (int) PetscReal(values[3*i+1]);
421       val = values[3*i+2];
422       if (col >= aij->cstart && col < aij->cend) {
423         col -= aij->cstart;
424         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
425       } else {
426         if (mat->was_assembled || mat->assembled) {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
429           }
430           col = aij->colmap[col] - 1;
431           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
432             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
433             col = (int) PetscReal(values[3*i+1]);
434           }
435         }
436         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
437       }
438     }
439     count--;
440   }
441   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
442 
443   /* wait on sends */
444   if (aij->nsends) {
445     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
446     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
447     PetscFree(send_status);
448   }
449   PetscFree(aij->send_waits); PetscFree(aij->svalues);
450 
451   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
452   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
453 
454   /* determine if any processor has disassembled, if so we must
455      also disassemble ourselfs, in order that we may reassemble. */
456   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
457   if (mat->was_assembled && !other_disassembled) {
458     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
459   }
460 
461   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
462     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
463   }
464   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
465   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
466 
467   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatZeroEntries_MPIAIJ"
473 int MatZeroEntries_MPIAIJ(Mat A)
474 {
475   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
476   int        ierr;
477 
478   PetscFunctionBegin;
479   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
480   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNC__
485 #define __FUNC__ "MatZeroRows_MPIAIJ"
486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
487 {
488   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
489   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
490   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
491   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
492   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
493   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
494   MPI_Comm       comm = A->comm;
495   MPI_Request    *send_waits,*recv_waits;
496   MPI_Status     recv_status,*send_status;
497   IS             istmp;
498   PetscTruth     localdiag;
499 
500   PetscFunctionBegin;
501   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
502   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
503 
504   /*  first count number of contributors to each processor */
505   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
506   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
507   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
508   for ( i=0; i<N; i++ ) {
509     idx = rows[i];
510     found = 0;
511     for ( j=0; j<size; j++ ) {
512       if (idx >= owners[j] && idx < owners[j+1]) {
513         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
514       }
515     }
516     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
517   }
518   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
519 
520   /* inform other processors of number of messages and max length*/
521   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
522   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
523   nrecvs = work[rank];
524   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
525   nmax = work[rank];
526   PetscFree(work);
527 
528   /* post receives:   */
529   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
530   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
531   for ( i=0; i<nrecvs; i++ ) {
532     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
533   }
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
540   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
541   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
542   starts[0] = 0;
543   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
544   for ( i=0; i<N; i++ ) {
545     svalues[starts[owner[i]]++] = rows[i];
546   }
547   ISRestoreIndices(is,&rows);
548 
549   starts[0] = 0;
550   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
551   count = 0;
552   for ( i=0; i<size; i++ ) {
553     if (procs[i]) {
554       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
555     }
556   }
557   PetscFree(starts);
558 
559   base = owners[rank];
560 
561   /*  wait on receives */
562   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
563   source = lens + nrecvs;
564   count  = nrecvs; slen = 0;
565   while (count) {
566     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
567     /* unpack receives into our local space */
568     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
569     source[imdex]  = recv_status.MPI_SOURCE;
570     lens[imdex]  = n;
571     slen += n;
572     count--;
573   }
574   PetscFree(recv_waits);
575 
576   /* move the data into the send scatter */
577   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
578   count = 0;
579   for ( i=0; i<nrecvs; i++ ) {
580     values = rvalues + i*nmax;
581     for ( j=0; j<lens[i]; j++ ) {
582       lrows[count++] = values[j] - base;
583     }
584   }
585   PetscFree(rvalues); PetscFree(lens);
586   PetscFree(owner); PetscFree(nprocs);
587 
588   /* actually zap the local rows */
589   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
590   PLogObjectParent(A,istmp);
591 
592   /*
593         Zero the required rows. If the "diagonal block" of the matrix
594      is square and the user wishes to set the diagonal we use seperate
595      code so that MatSetValues() is not called for each diagonal allocating
596      new memory, thus calling lots of mallocs and slowing things down.
597 
598        Contributed by: Mathew Knepley
599   */
600   localdiag = PETSC_FALSE;
601   if (diag && (l->A->M == l->A->N)) {
602     localdiag = PETSC_TRUE;
603     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
604   } else {
605     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
606   }
607   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
608   ierr = ISDestroy(istmp); CHKERRQ(ierr);
609 
610   if (diag && (localdiag == PETSC_FALSE)) {
611     for ( i = 0; i < slen; i++ ) {
612       row = lrows[i] + rstart;
613       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
614     }
615     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
616     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
617   }
618 
619   ierr = ISDestroy(istmp); CHKERRQ(ierr);
620 
621   if (diag) {
622     for ( i = 0; i < slen; i++ ) {
623       row = lrows[i] + rstart;
624       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
625     }
626     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
627     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
628   }
629   PetscFree(lrows);
630  /* wait on sends */
631   if (nsends) {
632     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
633     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
634     PetscFree(send_status);
635   }
636   PetscFree(send_waits); PetscFree(svalues);
637 
638   PetscFunctionReturn(0);
639 }
640 
641 #undef __FUNC__
642 #define __FUNC__ "MatMult_MPIAIJ"
643 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
644 {
645   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
646   int        ierr,nt;
647 
648   PetscFunctionBegin;
649   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
650   if (nt != a->n) {
651     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
652   }
653   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
654   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
655   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
656   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNC__
661 #define __FUNC__ "MatMultAdd_MPIAIJ"
662 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
663 {
664   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
665   int        ierr;
666 
667   PetscFunctionBegin;
668   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
669   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
670   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
671   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNC__
676 #define __FUNC__ "MatMultTrans_MPIAIJ"
677 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   /* do nondiagonal part */
684   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
685   /* send it on its way */
686   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
687   /* do local part */
688   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
689   /* receive remote parts: note this assumes the values are not actually */
690   /* inserted in yy until the next line, which is true for my implementation*/
691   /* but is not perhaps always true. */
692   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNC__
697 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
698 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
699 {
700   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
701   int        ierr;
702 
703   PetscFunctionBegin;
704   /* do nondiagonal part */
705   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
706   /* send it on its way */
707   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
708   /* do local part */
709   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
710   /* receive remote parts: note this assumes the values are not actually */
711   /* inserted in yy until the next line, which is true for my implementation*/
712   /* but is not perhaps always true. */
713   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 /*
718   This only works correctly for square matrices where the subblock A->A is the
719    diagonal block
720 */
721 #undef __FUNC__
722 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
723 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
724 {
725   int        ierr;
726   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
727 
728   PetscFunctionBegin;
729   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
730   if (a->rstart != a->cstart || a->rend != a->cend) {
731     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
732   }
733   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNC__
738 #define __FUNC__ "MatScale_MPIAIJ"
739 int MatScale_MPIAIJ(Scalar *aa,Mat A)
740 {
741   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
742   int        ierr;
743 
744   PetscFunctionBegin;
745   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
746   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNC__
751 #define __FUNC__ "MatDestroy_MPIAIJ"
752 int MatDestroy_MPIAIJ(Mat mat)
753 {
754   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
755   int        ierr;
756 
757   PetscFunctionBegin;
758 #if defined(USE_PETSC_LOG)
759   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
760 #endif
761   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
762   PetscFree(aij->rowners);
763   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
764   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
765   if (aij->colmap) PetscFree(aij->colmap);
766   if (aij->garray) PetscFree(aij->garray);
767   if (aij->lvec)   VecDestroy(aij->lvec);
768   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
769   if (aij->rowvalues) PetscFree(aij->rowvalues);
770   PetscFree(aij);
771   PLogObjectDestroy(mat);
772   PetscHeaderDestroy(mat);
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNC__
777 #define __FUNC__ "MatView_MPIAIJ_Binary"
778 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
779 {
780   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
781   int         ierr;
782 
783   PetscFunctionBegin;
784   if (aij->size == 1) {
785     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
786   }
787   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
788   PetscFunctionReturn(0);
789 }
790 
791 #undef __FUNC__
792 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
793 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
794 {
795   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
796   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
797   int         ierr, format,shift = C->indexshift,rank;
798   FILE        *fd;
799   ViewerType  vtype;
800 
801   PetscFunctionBegin;
802   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
803   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
804     ierr = ViewerGetFormat(viewer,&format);
805     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
806       MatInfo info;
807       int     flg;
808       MPI_Comm_rank(mat->comm,&rank);
809       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
810       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
811       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
812       PetscSequentialPhaseBegin(mat->comm,1);
813       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
814          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
815       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
816          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
817       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
818       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
819       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
820       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
821       fflush(fd);
822       PetscSequentialPhaseEnd(mat->comm,1);
823       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
824       PetscFunctionReturn(0);
825     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
826       PetscFunctionReturn(0);
827     }
828   }
829 
830   if (vtype == DRAW_VIEWER) {
831     Draw       draw;
832     PetscTruth isnull;
833     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
834     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
835   }
836 
837   if (vtype == ASCII_FILE_VIEWER) {
838     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
839     PetscSequentialPhaseBegin(mat->comm,1);
840     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
841            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
842            aij->cend);
843     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
844     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
845     fflush(fd);
846     PetscSequentialPhaseEnd(mat->comm,1);
847   } else {
848     int size = aij->size;
849     rank = aij->rank;
850     if (size == 1) {
851       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
852     } else {
853       /* assemble the entire matrix onto first processor. */
854       Mat         A;
855       Mat_SeqAIJ *Aloc;
856       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
857       Scalar      *a;
858 
859       if (!rank) {
860         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
861       } else {
862         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
863       }
864       PLogObjectParent(mat,A);
865 
866       /* copy over the A part */
867       Aloc = (Mat_SeqAIJ*) aij->A->data;
868       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
869       row = aij->rstart;
870       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
871       for ( i=0; i<m; i++ ) {
872         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
873         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
874       }
875       aj = Aloc->j;
876       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
877 
878       /* copy over the B part */
879       Aloc = (Mat_SeqAIJ*) aij->B->data;
880       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
881       row = aij->rstart;
882       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
883       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
884       for ( i=0; i<m; i++ ) {
885         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
886         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
887       }
888       PetscFree(ct);
889       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
890       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
891       /*
892          Everyone has to call to draw the matrix since the graphics waits are
893          synchronized across all processors that share the Draw object
894       */
895       if (!rank || vtype == DRAW_VIEWER) {
896         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
897       }
898       ierr = MatDestroy(A); CHKERRQ(ierr);
899     }
900   }
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNC__
905 #define __FUNC__ "MatView_MPIAIJ"
906 int MatView_MPIAIJ(Mat mat,Viewer viewer)
907 {
908   int         ierr;
909   ViewerType  vtype;
910 
911   PetscFunctionBegin;
912   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
913   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
914       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
915     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
916   } else if (vtype == BINARY_FILE_VIEWER) {
917     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
918   } else {
919     SETERRQ(1,1,"Viewer type not supported by PETSc object");
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 /*
925     This has to provide several versions.
926 
927      2) a) use only local smoothing updating outer values only once.
928         b) local smoothing updating outer values each inner iteration
929      3) color updating out values betwen colors.
930 */
931 #undef __FUNC__
932 #define __FUNC__ "MatRelax_MPIAIJ"
933 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
934                            double fshift,int its,Vec xx)
935 {
936   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
937   Mat        AA = mat->A, BB = mat->B;
938   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
939   Scalar     *b,*x,*xs,*ls,d,*v,sum;
940   int        ierr,*idx, *diag;
941   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
942 
943   PetscFunctionBegin;
944   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
945   xs = x + shift; /* shift by one for index start of 1 */
946   ls = ls + shift;
947   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
948   diag = A->diag;
949   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
950     if (flag & SOR_ZERO_INITIAL_GUESS) {
951       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
952       PetscFunctionReturn(0);
953     }
954     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
955     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
956     while (its--) {
957       /* go down through the rows */
958       for ( i=0; i<m; i++ ) {
959         n    = A->i[i+1] - A->i[i];
960         idx  = A->j + A->i[i] + shift;
961         v    = A->a + A->i[i] + shift;
962         sum  = b[i];
963         SPARSEDENSEMDOT(sum,xs,v,idx,n);
964         d    = fshift + A->a[diag[i]+shift];
965         n    = B->i[i+1] - B->i[i];
966         idx  = B->j + B->i[i] + shift;
967         v    = B->a + B->i[i] + shift;
968         SPARSEDENSEMDOT(sum,ls,v,idx,n);
969         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
970       }
971       /* come up through the rows */
972       for ( i=m-1; i>-1; i-- ) {
973         n    = A->i[i+1] - A->i[i];
974         idx  = A->j + A->i[i] + shift;
975         v    = A->a + A->i[i] + shift;
976         sum  = b[i];
977         SPARSEDENSEMDOT(sum,xs,v,idx,n);
978         d    = fshift + A->a[diag[i]+shift];
979         n    = B->i[i+1] - B->i[i];
980         idx  = B->j + B->i[i] + shift;
981         v    = B->a + B->i[i] + shift;
982         SPARSEDENSEMDOT(sum,ls,v,idx,n);
983         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
984       }
985     }
986   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
987     if (flag & SOR_ZERO_INITIAL_GUESS) {
988       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
989       PetscFunctionReturn(0);
990     }
991     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
992     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
993     while (its--) {
994       for ( i=0; i<m; i++ ) {
995         n    = A->i[i+1] - A->i[i];
996         idx  = A->j + A->i[i] + shift;
997         v    = A->a + A->i[i] + shift;
998         sum  = b[i];
999         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1000         d    = fshift + A->a[diag[i]+shift];
1001         n    = B->i[i+1] - B->i[i];
1002         idx  = B->j + B->i[i] + shift;
1003         v    = B->a + B->i[i] + shift;
1004         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1005         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1006       }
1007     }
1008   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1009     if (flag & SOR_ZERO_INITIAL_GUESS) {
1010       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1011       PetscFunctionReturn(0);
1012     }
1013     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1014                             mat->Mvctx); CHKERRQ(ierr);
1015     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1016                             mat->Mvctx); CHKERRQ(ierr);
1017     while (its--) {
1018       for ( i=m-1; i>-1; i-- ) {
1019         n    = A->i[i+1] - A->i[i];
1020         idx  = A->j + A->i[i] + shift;
1021         v    = A->a + A->i[i] + shift;
1022         sum  = b[i];
1023         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1024         d    = fshift + A->a[diag[i]+shift];
1025         n    = B->i[i+1] - B->i[i];
1026         idx  = B->j + B->i[i] + shift;
1027         v    = B->a + B->i[i] + shift;
1028         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1029         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1030       }
1031     }
1032   } else {
1033     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNC__
1039 #define __FUNC__ "MatGetInfo_MPIAIJ"
1040 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1041 {
1042   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1043   Mat        A = mat->A, B = mat->B;
1044   int        ierr;
1045   double     isend[5], irecv[5];
1046 
1047   PetscFunctionBegin;
1048   info->block_size     = 1.0;
1049   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1050   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1051   isend[3] = info->memory;  isend[4] = info->mallocs;
1052   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1053   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1054   isend[3] += info->memory;  isend[4] += info->mallocs;
1055   if (flag == MAT_LOCAL) {
1056     info->nz_used      = isend[0];
1057     info->nz_allocated = isend[1];
1058     info->nz_unneeded  = isend[2];
1059     info->memory       = isend[3];
1060     info->mallocs      = isend[4];
1061   } else if (flag == MAT_GLOBAL_MAX) {
1062     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1063     info->nz_used      = irecv[0];
1064     info->nz_allocated = irecv[1];
1065     info->nz_unneeded  = irecv[2];
1066     info->memory       = irecv[3];
1067     info->mallocs      = irecv[4];
1068   } else if (flag == MAT_GLOBAL_SUM) {
1069     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1070     info->nz_used      = irecv[0];
1071     info->nz_allocated = irecv[1];
1072     info->nz_unneeded  = irecv[2];
1073     info->memory       = irecv[3];
1074     info->mallocs      = irecv[4];
1075   }
1076   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1077   info->fill_ratio_needed = 0;
1078   info->factor_mallocs    = 0;
1079   info->rows_global       = (double)mat->M;
1080   info->columns_global    = (double)mat->N;
1081   info->rows_local        = (double)mat->m;
1082   info->columns_local     = (double)mat->N;
1083 
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNC__
1088 #define __FUNC__ "MatSetOption_MPIAIJ"
1089 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1092 
1093   PetscFunctionBegin;
1094   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1095       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1096       op == MAT_COLUMNS_UNSORTED ||
1097       op == MAT_COLUMNS_SORTED ||
1098       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1099       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1100         MatSetOption(a->A,op);
1101         MatSetOption(a->B,op);
1102   } else if (op == MAT_ROW_ORIENTED) {
1103     a->roworiented = 1;
1104     MatSetOption(a->A,op);
1105     MatSetOption(a->B,op);
1106   } else if (op == MAT_ROWS_SORTED ||
1107              op == MAT_ROWS_UNSORTED ||
1108              op == MAT_SYMMETRIC ||
1109              op == MAT_STRUCTURALLY_SYMMETRIC ||
1110              op == MAT_YES_NEW_DIAGONALS)
1111     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1112   else if (op == MAT_COLUMN_ORIENTED) {
1113     a->roworiented = 0;
1114     MatSetOption(a->A,op);
1115     MatSetOption(a->B,op);
1116   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1117     a->donotstash = 1;
1118   } else if (op == MAT_NO_NEW_DIAGONALS){
1119     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1120   } else {
1121     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1122   }
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNC__
1127 #define __FUNC__ "MatGetSize_MPIAIJ"
1128 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1129 {
1130   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1131 
1132   PetscFunctionBegin;
1133   if (m) *m = mat->M;
1134   if (n) *n = mat->N;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNC__
1139 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1140 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1141 {
1142   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1143 
1144   PetscFunctionBegin;
1145   if (m) *m = mat->m;
1146   if (n) *n = mat->n;
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNC__
1151 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1152 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1153 {
1154   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1155 
1156   PetscFunctionBegin;
1157   *m = mat->rstart; *n = mat->rend;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1162 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1163 
1164 #undef __FUNC__
1165 #define __FUNC__ "MatGetRow_MPIAIJ"
1166 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1167 {
1168   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1169   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1170   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1171   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1172   int        *cmap, *idx_p;
1173 
1174   PetscFunctionBegin;
1175   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1176   mat->getrowactive = PETSC_TRUE;
1177 
1178   if (!mat->rowvalues && (idx || v)) {
1179     /*
1180         allocate enough space to hold information from the longest row.
1181     */
1182     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1183     int     max = 1,m = mat->m,tmp;
1184     for ( i=0; i<m; i++ ) {
1185       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1186       if (max < tmp) { max = tmp; }
1187     }
1188     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1189     CHKPTRQ(mat->rowvalues);
1190     mat->rowindices = (int *) (mat->rowvalues + max);
1191   }
1192 
1193   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1194   lrow = row - rstart;
1195 
1196   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1197   if (!v)   {pvA = 0; pvB = 0;}
1198   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1199   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1200   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1201   nztot = nzA + nzB;
1202 
1203   cmap  = mat->garray;
1204   if (v  || idx) {
1205     if (nztot) {
1206       /* Sort by increasing column numbers, assuming A and B already sorted */
1207       int imark = -1;
1208       if (v) {
1209         *v = v_p = mat->rowvalues;
1210         for ( i=0; i<nzB; i++ ) {
1211           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1212           else break;
1213         }
1214         imark = i;
1215         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1216         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1217       }
1218       if (idx) {
1219         *idx = idx_p = mat->rowindices;
1220         if (imark > -1) {
1221           for ( i=0; i<imark; i++ ) {
1222             idx_p[i] = cmap[cworkB[i]];
1223           }
1224         } else {
1225           for ( i=0; i<nzB; i++ ) {
1226             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1227             else break;
1228           }
1229           imark = i;
1230         }
1231         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1232         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1233       }
1234     }
1235     else {
1236       if (idx) *idx = 0;
1237       if (v)   *v   = 0;
1238     }
1239   }
1240   *nz = nztot;
1241   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1242   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNC__
1247 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1248 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1249 {
1250   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1251 
1252   PetscFunctionBegin;
1253   if (aij->getrowactive == PETSC_FALSE) {
1254     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1255   }
1256   aij->getrowactive = PETSC_FALSE;
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNC__
1261 #define __FUNC__ "MatNorm_MPIAIJ"
1262 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1263 {
1264   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1265   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1266   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1267   double     sum = 0.0;
1268   Scalar     *v;
1269 
1270   PetscFunctionBegin;
1271   if (aij->size == 1) {
1272     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1273   } else {
1274     if (type == NORM_FROBENIUS) {
1275       v = amat->a;
1276       for (i=0; i<amat->nz; i++ ) {
1277 #if defined(USE_PETSC_COMPLEX)
1278         sum += real(conj(*v)*(*v)); v++;
1279 #else
1280         sum += (*v)*(*v); v++;
1281 #endif
1282       }
1283       v = bmat->a;
1284       for (i=0; i<bmat->nz; i++ ) {
1285 #if defined(USE_PETSC_COMPLEX)
1286         sum += real(conj(*v)*(*v)); v++;
1287 #else
1288         sum += (*v)*(*v); v++;
1289 #endif
1290       }
1291       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1292       *norm = sqrt(*norm);
1293     } else if (type == NORM_1) { /* max column norm */
1294       double *tmp, *tmp2;
1295       int    *jj, *garray = aij->garray;
1296       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1297       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1298       PetscMemzero(tmp,aij->N*sizeof(double));
1299       *norm = 0.0;
1300       v = amat->a; jj = amat->j;
1301       for ( j=0; j<amat->nz; j++ ) {
1302         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1303       }
1304       v = bmat->a; jj = bmat->j;
1305       for ( j=0; j<bmat->nz; j++ ) {
1306         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1307       }
1308       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1309       for ( j=0; j<aij->N; j++ ) {
1310         if (tmp2[j] > *norm) *norm = tmp2[j];
1311       }
1312       PetscFree(tmp); PetscFree(tmp2);
1313     } else if (type == NORM_INFINITY) { /* max row norm */
1314       double ntemp = 0.0;
1315       for ( j=0; j<amat->m; j++ ) {
1316         v = amat->a + amat->i[j] + shift;
1317         sum = 0.0;
1318         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1319           sum += PetscAbsScalar(*v); v++;
1320         }
1321         v = bmat->a + bmat->i[j] + shift;
1322         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1323           sum += PetscAbsScalar(*v); v++;
1324         }
1325         if (sum > ntemp) ntemp = sum;
1326       }
1327       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1328     } else {
1329       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1330     }
1331   }
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNC__
1336 #define __FUNC__ "MatTranspose_MPIAIJ"
1337 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1338 {
1339   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1340   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1341   int        ierr,shift = Aloc->indexshift;
1342   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1343   Mat        B;
1344   Scalar     *array;
1345 
1346   PetscFunctionBegin;
1347   if (matout == PETSC_NULL && M != N) {
1348     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1349   }
1350 
1351   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1352 
1353   /* copy over the A part */
1354   Aloc = (Mat_SeqAIJ*) a->A->data;
1355   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1356   row = a->rstart;
1357   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1358   for ( i=0; i<m; i++ ) {
1359     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1360     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1361   }
1362   aj = Aloc->j;
1363   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1364 
1365   /* copy over the B part */
1366   Aloc = (Mat_SeqAIJ*) a->B->data;
1367   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1368   row = a->rstart;
1369   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1370   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1371   for ( i=0; i<m; i++ ) {
1372     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1373     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1374   }
1375   PetscFree(ct);
1376   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1377   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1378   if (matout != PETSC_NULL) {
1379     *matout = B;
1380   } else {
1381     PetscOps       *Abops;
1382     struct _MatOps *Aops;
1383 
1384     /* This isn't really an in-place transpose .... but free data structures from a */
1385     PetscFree(a->rowners);
1386     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1387     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1388     if (a->colmap) PetscFree(a->colmap);
1389     if (a->garray) PetscFree(a->garray);
1390     if (a->lvec) VecDestroy(a->lvec);
1391     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1392     PetscFree(a);
1393 
1394     /*
1395        This is horrible, horrible code. We need to keep the
1396       A pointers for the bops and ops but copy everything
1397       else from C.
1398     */
1399     Abops = A->bops;
1400     Aops  = A->ops;
1401     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1402     A->bops = Abops;
1403     A->ops  = Aops;
1404     PetscHeaderDestroy(B);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNC__
1410 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1411 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1412 {
1413   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1414   Mat a = aij->A, b = aij->B;
1415   int ierr,s1,s2,s3;
1416 
1417   PetscFunctionBegin;
1418   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1419   if (rr) {
1420     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1421     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1422     /* Overlap communication with computation. */
1423     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1424   }
1425   if (ll) {
1426     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1427     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1428     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1429   }
1430   /* scale  the diagonal block */
1431   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1432 
1433   if (rr) {
1434     /* Do a scatter end and then right scale the off-diagonal block */
1435     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1436     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1437   }
1438 
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 
1443 extern int MatPrintHelp_SeqAIJ(Mat);
1444 #undef __FUNC__
1445 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1446 int MatPrintHelp_MPIAIJ(Mat A)
1447 {
1448   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1449   int        ierr;
1450 
1451   PetscFunctionBegin;
1452   if (!a->rank) {
1453     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNC__
1459 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1461 {
1462   PetscFunctionBegin;
1463   *bs = 1;
1464   PetscFunctionReturn(0);
1465 }
1466 #undef __FUNC__
1467 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1468 int MatSetUnfactored_MPIAIJ(Mat A)
1469 {
1470   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNC__
1479 #define __FUNC__ "MatEqual_MPIAIJ"
1480 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1481 {
1482   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1483   Mat        a, b, c, d;
1484   PetscTruth flg;
1485   int        ierr;
1486 
1487   PetscFunctionBegin;
1488   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1489   a = matA->A; b = matA->B;
1490   c = matB->A; d = matB->B;
1491 
1492   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1493   if (flg == PETSC_TRUE) {
1494     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1495   }
1496   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1501 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1502 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1503 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1504 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1505 
1506 /* -------------------------------------------------------------------*/
1507 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1508        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1509        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1510        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1511        0,0,
1512        0,0,
1513        0,0,
1514        MatRelax_MPIAIJ,
1515        MatTranspose_MPIAIJ,
1516        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1517        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1518        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1519        0,
1520        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1521        0,0,0,0,
1522        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1523        0,0,
1524        0,0,MatConvertSameType_MPIAIJ,0,0,
1525        0,0,0,
1526        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1527        MatPrintHelp_MPIAIJ,
1528        MatScale_MPIAIJ,0,0,0,
1529        MatGetBlockSize_MPIAIJ,0,0,0,0,
1530        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1531        0,0,MatGetSubMatrix_MPIAIJ};
1532 
1533 
1534 #undef __FUNC__
1535 #define __FUNC__ "MatCreateMPIAIJ"
1536 /*@C
1537    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1538    (the default parallel PETSc format).  For good matrix assembly performance
1539    the user should preallocate the matrix storage by setting the parameters
1540    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1541    performance can be increased by more than a factor of 50.
1542 
1543    Collective on MPI_Comm
1544 
1545    Input Parameters:
1546 +  comm - MPI communicator
1547 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1548            This value should be the same as the local size used in creating the
1549            y vector for the matrix-vector product y = Ax.
1550 .  n - This value should be the same as the local size used in creating the
1551        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1552        calculated if N is given) For square matrices n is almost always m.
1553 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1554 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1555 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1556            (same for all local rows)
1557 .  d_nzz - array containing the number of nonzeros in the various rows of the
1558            diagonal portion of the local submatrix (possibly different for each row)
1559            or PETSC_NULL. You must leave room for the diagonal entry even if
1560            it is zero.
1561 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1562            submatrix (same for all local rows).
1563 -  o_nzz - array containing the number of nonzeros in the various rows of the
1564            off-diagonal portion of the local submatrix (possibly different for
1565            each row) or PETSC_NULL.
1566 
1567    Output Parameter:
1568 .  A - the matrix
1569 
1570    Notes:
1571    The AIJ format (also called the Yale sparse matrix format or
1572    compressed row storage), is fully compatible with standard Fortran 77
1573    storage.  That is, the stored row and column indices can begin at
1574    either one (as in Fortran) or zero.  See the users manual for details.
1575 
1576    The user MUST specify either the local or global matrix dimensions
1577    (possibly both).
1578 
1579    By default, this format uses inodes (identical nodes) when possible.
1580    We search for consecutive rows with the same nonzero structure, thereby
1581    reusing matrix information to achieve increased efficiency.
1582 
1583    Options Database Keys:
1584 +  -mat_aij_no_inode  - Do not use inodes
1585 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1586 -  -mat_aij_oneindex - Internally use indexing starting at 1
1587         rather than 0.  Note that when calling MatSetValues(),
1588         the user still MUST index entries starting at 0!
1589 
1590    Storage Information:
1591    For a square global matrix we define each processor's diagonal portion
1592    to be its local rows and the corresponding columns (a square submatrix);
1593    each processor's off-diagonal portion encompasses the remainder of the
1594    local matrix (a rectangular submatrix).
1595 
1596    The user can specify preallocated storage for the diagonal part of
1597    the local submatrix with either d_nz or d_nnz (not both).  Set
1598    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1599    memory allocation.  Likewise, specify preallocated storage for the
1600    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1601 
1602    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1603    the figure below we depict these three local rows and all columns (0-11).
1604 
1605 .vb
1606              0 1 2 3 4 5 6 7 8 9 10 11
1607             -------------------
1608      row 3  |  o o o d d d o o o o o o
1609      row 4  |  o o o d d d o o o o o o
1610      row 5  |  o o o d d d o o o o o o
1611             -------------------
1612 .ve
1613 
1614    Thus, any entries in the d locations are stored in the d (diagonal)
1615    submatrix, and any entries in the o locations are stored in the
1616    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1617    stored simply in the MATSEQAIJ format for compressed row storage.
1618 
1619    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1620    and o_nz should indicate the number of nonzeros per row in the o matrix.
1621    In general, for PDE problems in which most nonzeros are near the diagonal,
1622    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1623    or you will get TERRIBLE performance; see the users' manual chapter on
1624    matrices.
1625 
1626 .keywords: matrix, aij, compressed row, sparse, parallel
1627 
1628 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1629 @*/
1630 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1631 {
1632   Mat          B;
1633   Mat_MPIAIJ   *b;
1634   int          ierr, i,sum[2],work[2],size;
1635 
1636   PetscFunctionBegin;
1637   MPI_Comm_size(comm,&size);
1638   if (size == 1) {
1639     if (M == PETSC_DECIDE) M = m;
1640     if (N == PETSC_DECIDE) N = n;
1641     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1642     PetscFunctionReturn(0);
1643   }
1644 
1645   *A = 0;
1646   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1647   PLogObjectCreate(B);
1648   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1649   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1650   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1651   B->ops->destroy    = MatDestroy_MPIAIJ;
1652   B->ops->view       = MatView_MPIAIJ;
1653   B->factor     = 0;
1654   B->assembled  = PETSC_FALSE;
1655   B->mapping    = 0;
1656 
1657   B->insertmode = NOT_SET_VALUES;
1658   b->size       = size;
1659   MPI_Comm_rank(comm,&b->rank);
1660 
1661   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1662     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1663   }
1664 
1665   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1666     work[0] = m; work[1] = n;
1667     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1668     if (M == PETSC_DECIDE) M = sum[0];
1669     if (N == PETSC_DECIDE) N = sum[1];
1670   }
1671   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1672   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1673   b->m = m; B->m = m;
1674   b->n = n; B->n = n;
1675   b->N = N; B->N = N;
1676   b->M = M; B->M = M;
1677 
1678   /* build local table of row and column ownerships */
1679   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1680   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1681   b->cowners = b->rowners + b->size + 2;
1682   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1683   b->rowners[0] = 0;
1684   for ( i=2; i<=b->size; i++ ) {
1685     b->rowners[i] += b->rowners[i-1];
1686   }
1687   b->rstart = b->rowners[b->rank];
1688   b->rend   = b->rowners[b->rank+1];
1689   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1690   b->cowners[0] = 0;
1691   for ( i=2; i<=b->size; i++ ) {
1692     b->cowners[i] += b->cowners[i-1];
1693   }
1694   b->cstart = b->cowners[b->rank];
1695   b->cend   = b->cowners[b->rank+1];
1696 
1697   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1698   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1699   PLogObjectParent(B,b->A);
1700   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1701   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1702   PLogObjectParent(B,b->B);
1703 
1704   /* build cache for off array entries formed */
1705   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1706   b->donotstash  = 0;
1707   b->colmap      = 0;
1708   b->garray      = 0;
1709   b->roworiented = 1;
1710 
1711   /* stuff used for matrix vector multiply */
1712   b->lvec      = 0;
1713   b->Mvctx     = 0;
1714 
1715   /* stuff for MatGetRow() */
1716   b->rowindices   = 0;
1717   b->rowvalues    = 0;
1718   b->getrowactive = PETSC_FALSE;
1719 
1720   *A = B;
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNC__
1725 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1726 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1727 {
1728   Mat        mat;
1729   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1730   int        ierr, len=0, flg;
1731 
1732   PetscFunctionBegin;
1733   *newmat       = 0;
1734   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1735   PLogObjectCreate(mat);
1736   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1737   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1738   mat->ops->destroy    = MatDestroy_MPIAIJ;
1739   mat->ops->view       = MatView_MPIAIJ;
1740   mat->factor     = matin->factor;
1741   mat->assembled  = PETSC_TRUE;
1742 
1743   a->m = mat->m   = oldmat->m;
1744   a->n = mat->n   = oldmat->n;
1745   a->M = mat->M   = oldmat->M;
1746   a->N = mat->N   = oldmat->N;
1747 
1748   a->rstart       = oldmat->rstart;
1749   a->rend         = oldmat->rend;
1750   a->cstart       = oldmat->cstart;
1751   a->cend         = oldmat->cend;
1752   a->size         = oldmat->size;
1753   a->rank         = oldmat->rank;
1754   mat->insertmode = NOT_SET_VALUES;
1755   a->rowvalues    = 0;
1756   a->getrowactive = PETSC_FALSE;
1757 
1758   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1759   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1760   a->cowners = a->rowners + a->size + 2;
1761   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1762   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1763   if (oldmat->colmap) {
1764     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1765     PLogObjectMemory(mat,(a->N)*sizeof(int));
1766     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1767   } else a->colmap = 0;
1768   if (oldmat->garray) {
1769     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1770     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1771     PLogObjectMemory(mat,len*sizeof(int));
1772     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1773   } else a->garray = 0;
1774 
1775   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1776   PLogObjectParent(mat,a->lvec);
1777   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1778   PLogObjectParent(mat,a->Mvctx);
1779   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1780   PLogObjectParent(mat,a->A);
1781   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1782   PLogObjectParent(mat,a->B);
1783   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1784   if (flg) {
1785     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1786   }
1787   *newmat = mat;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 #include "sys.h"
1792 
1793 #undef __FUNC__
1794 #define __FUNC__ "MatLoad_MPIAIJ"
1795 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1796 {
1797   Mat          A;
1798   Scalar       *vals,*svals;
1799   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1800   MPI_Status   status;
1801   int          i, nz, ierr, j,rstart, rend, fd;
1802   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1803   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1804   int          tag = ((PetscObject)viewer)->tag;
1805 
1806   PetscFunctionBegin;
1807   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1808   if (!rank) {
1809     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1810     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1811     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1812     if (header[3] < 0) {
1813       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1814     }
1815   }
1816 
1817 
1818   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1819   M = header[1]; N = header[2];
1820   /* determine ownership of all rows */
1821   m = M/size + ((M % size) > rank);
1822   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1823   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1824   rowners[0] = 0;
1825   for ( i=2; i<=size; i++ ) {
1826     rowners[i] += rowners[i-1];
1827   }
1828   rstart = rowners[rank];
1829   rend   = rowners[rank+1];
1830 
1831   /* distribute row lengths to all processors */
1832   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1833   offlens = ourlens + (rend-rstart);
1834   if (!rank) {
1835     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1836     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1837     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1838     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1839     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1840     PetscFree(sndcounts);
1841   } else {
1842     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1843   }
1844 
1845   if (!rank) {
1846     /* calculate the number of nonzeros on each processor */
1847     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1848     PetscMemzero(procsnz,size*sizeof(int));
1849     for ( i=0; i<size; i++ ) {
1850       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1851         procsnz[i] += rowlengths[j];
1852       }
1853     }
1854     PetscFree(rowlengths);
1855 
1856     /* determine max buffer needed and allocate it */
1857     maxnz = 0;
1858     for ( i=0; i<size; i++ ) {
1859       maxnz = PetscMax(maxnz,procsnz[i]);
1860     }
1861     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1862 
1863     /* read in my part of the matrix column indices  */
1864     nz     = procsnz[0];
1865     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1866     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1867 
1868     /* read in every one elses and ship off */
1869     for ( i=1; i<size; i++ ) {
1870       nz   = procsnz[i];
1871       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1872       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1873     }
1874     PetscFree(cols);
1875   } else {
1876     /* determine buffer space needed for message */
1877     nz = 0;
1878     for ( i=0; i<m; i++ ) {
1879       nz += ourlens[i];
1880     }
1881     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1882 
1883     /* receive message of column indices*/
1884     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1885     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1886     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1887   }
1888 
1889   /* loop over local rows, determining number of off diagonal entries */
1890   PetscMemzero(offlens,m*sizeof(int));
1891   jj = 0;
1892   for ( i=0; i<m; i++ ) {
1893     for ( j=0; j<ourlens[i]; j++ ) {
1894       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1895       jj++;
1896     }
1897   }
1898 
1899   /* create our matrix */
1900   for ( i=0; i<m; i++ ) {
1901     ourlens[i] -= offlens[i];
1902   }
1903   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1904   A = *newmat;
1905   MatSetOption(A,MAT_COLUMNS_SORTED);
1906   for ( i=0; i<m; i++ ) {
1907     ourlens[i] += offlens[i];
1908   }
1909 
1910   if (!rank) {
1911     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1912 
1913     /* read in my part of the matrix numerical values  */
1914     nz = procsnz[0];
1915     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1916 
1917     /* insert into matrix */
1918     jj      = rstart;
1919     smycols = mycols;
1920     svals   = vals;
1921     for ( i=0; i<m; i++ ) {
1922       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1923       smycols += ourlens[i];
1924       svals   += ourlens[i];
1925       jj++;
1926     }
1927 
1928     /* read in other processors and ship out */
1929     for ( i=1; i<size; i++ ) {
1930       nz   = procsnz[i];
1931       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1932       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1933     }
1934     PetscFree(procsnz);
1935   } else {
1936     /* receive numeric values */
1937     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1938 
1939     /* receive message of values*/
1940     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1941     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1942     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1943 
1944     /* insert into matrix */
1945     jj      = rstart;
1946     smycols = mycols;
1947     svals   = vals;
1948     for ( i=0; i<m; i++ ) {
1949       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1950       smycols += ourlens[i];
1951       svals   += ourlens[i];
1952       jj++;
1953     }
1954   }
1955   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1956 
1957   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1958   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNC__
1963 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1964 /*
1965     Not great since it makes two copies of the submatrix, first an SeqAIJ
1966   in local and then by concatenating the local matrices the end result.
1967   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1968 */
1969 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1970 {
1971   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1972   Mat        *local,M, Mreuse;
1973   Scalar     *vwork,*aa;
1974   MPI_Comm   comm = mat->comm;
1975   Mat_SeqAIJ *aij;
1976   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1977 
1978   PetscFunctionBegin;
1979   MPI_Comm_rank(comm,&rank);
1980   MPI_Comm_size(comm,&size);
1981 
1982   if (call ==  MAT_REUSE_MATRIX) {
1983     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1984     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
1985     local = &Mreuse;
1986     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1987   } else {
1988     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1989     Mreuse = *local;
1990     PetscFree(local);
1991   }
1992 
1993   /*
1994       m - number of local rows
1995       n - number of columns (same on all processors)
1996       rstart - first row in new global matrix generated
1997   */
1998   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1999   if (call == MAT_INITIAL_MATRIX) {
2000     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2001     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2002     ii  = aij->i;
2003     jj  = aij->j;
2004 
2005     /*
2006         Determine the number of non-zeros in the diagonal and off-diagonal
2007         portions of the matrix in order to do correct preallocation
2008     */
2009 
2010     /* first get start and end of "diagonal" columns */
2011     if (csize == PETSC_DECIDE) {
2012       nlocal = n/size + ((n % size) > rank);
2013     } else {
2014       nlocal = csize;
2015     }
2016     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2017     rstart = rend - nlocal;
2018     if (rank == size - 1 && rend != n) {
2019       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2020     }
2021 
2022     /* next, compute all the lengths */
2023     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2024     olens = dlens + m;
2025     for ( i=0; i<m; i++ ) {
2026       jend = ii[i+1] - ii[i];
2027       olen = 0;
2028       dlen = 0;
2029       for ( j=0; j<jend; j++ ) {
2030         if ( *jj < rstart || *jj >= rend) olen++;
2031         else dlen++;
2032         jj++;
2033       }
2034       olens[i] = olen;
2035       dlens[i] = dlen;
2036     }
2037     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2038     PetscFree(dlens);
2039   } else {
2040     int ml,nl;
2041 
2042     M = *newmat;
2043     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2044     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2045     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2046     /*
2047          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2048        rather than the slower MatSetValues().
2049     */
2050     M->was_assembled = PETSC_TRUE;
2051     M->assembled     = PETSC_FALSE;
2052   }
2053   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2054   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2055   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2056   ii  = aij->i;
2057   jj  = aij->j;
2058   aa  = aij->a;
2059   for (i=0; i<m; i++) {
2060     row   = rstart + i;
2061     nz    = ii[i+1] - ii[i];
2062     cwork = jj;     jj += nz;
2063     vwork = aa;     aa += nz;
2064     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2065   }
2066 
2067   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2069   *newmat = M;
2070 
2071   /* save submatrix used in processor for next request */
2072   if (call ==  MAT_INITIAL_MATRIX) {
2073     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2074     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2075   }
2076 
2077   PetscFunctionReturn(0);
2078 }
2079