xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0b3b1487d4e2d965bae3bb8a768736965a2af5d0)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.252 1998/06/30 21:10:39 balay Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         } else {
282           if (!aij->colmap) {
283             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
284           }
285           col = aij->colmap[idxn[j]] - 1;
286           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
287           else {
288             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
289           }
290         }
291       }
292     } else {
293       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
294     }
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNC__
300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
302 {
303   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
304   MPI_Comm    comm = mat->comm;
305   int         size = aij->size, *owners = aij->rowners;
306   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
307   MPI_Request *send_waits,*recv_waits;
308   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
309   InsertMode  addv;
310   Scalar      *rvalues,*svalues;
311 
312   PetscFunctionBegin;
313   if (aij->donotstash) {
314     aij->svalues    = 0; aij->rvalues    = 0;
315     aij->nsends     = 0; aij->nrecvs     = 0;
316     aij->send_waits = 0; aij->recv_waits = 0;
317     aij->rmax       = 0;
318     PetscFunctionReturn(0);
319   }
320 
321   /* make sure all processors are either in INSERTMODE or ADDMODE */
322   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
323   if (addv == (ADD_VALUES|INSERT_VALUES)) {
324     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
325   }
326   mat->insertmode = addv; /* in case this processor had no cache */
327 
328   /*  first count number of contributors to each processor */
329   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
330   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
331   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
332   for ( i=0; i<aij->stash.n; i++ ) {
333     idx = aij->stash.idx[i];
334     for ( j=0; j<size; j++ ) {
335       if (idx >= owners[j] && idx < owners[j+1]) {
336         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
337       }
338     }
339   }
340   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
341 
342   /* inform other processors of number of messages and max length*/
343   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
344   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
345   nreceives = work[rank];
346   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
347   nmax = work[rank];
348   PetscFree(work);
349 
350   /* post receives:
351        1) each message will consist of ordered pairs
352      (global index,value) we store the global index as a double
353      to simplify the message passing.
354        2) since we don't know how long each individual message is we
355      allocate the largest needed buffer for each receive. Potentially
356      this is a lot of wasted space.
357 
358 
359        This could be done better.
360   */
361   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
362   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
363   for ( i=0; i<nreceives; i++ ) {
364     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
365               comm,recv_waits+i);CHKERRQ(ierr);
366   }
367 
368   /* do sends:
369       1) starts[i] gives the starting index in svalues for stuff going to
370          the ith processor
371   */
372   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
373   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
374   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
375   starts[0]  = 0;
376   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
377   for ( i=0; i<aij->stash.n; i++ ) {
378     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
379     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
380     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
381   }
382   PetscFree(owner);
383   starts[0] = 0;
384   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
385   count = 0;
386   for ( i=0; i<size; i++ ) {
387     if (procs[i]) {
388       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
389     }
390   }
391   PetscFree(starts);
392   PetscFree(nprocs);
393 
394   /* Free cache space */
395   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
396   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
397 
398   aij->svalues    = svalues;    aij->rvalues    = rvalues;
399   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
400   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
401   aij->rmax       = nmax;
402 
403   PetscFunctionReturn(0);
404 }
405 extern int MatSetUpMultiply_MPIAIJ(Mat);
406 
407 #undef __FUNC__
408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
410 {
411   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
412   MPI_Status  *send_status,recv_status;
413   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
414   int         row,col,other_disassembled;
415   Scalar      *values,val;
416   InsertMode  addv = mat->insertmode;
417 
418   PetscFunctionBegin;
419 
420   /*  wait on receives */
421   while (count) {
422     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
423     /* unpack receives into our local space */
424     values = aij->rvalues + 3*imdex*aij->rmax;
425     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
426     n = n/3;
427     for ( i=0; i<n; i++ ) {
428       row = (int) PetscReal(values[3*i]) - aij->rstart;
429       col = (int) PetscReal(values[3*i+1]);
430       val = values[3*i+2];
431       if (col >= aij->cstart && col < aij->cend) {
432         col -= aij->cstart;
433         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
434       } else {
435         if (mat->was_assembled || mat->assembled) {
436           if (!aij->colmap) {
437             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
438           }
439           col = aij->colmap[col] - 1;
440           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
441             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
442             col = (int) PetscReal(values[3*i+1]);
443           }
444         }
445         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
446       }
447     }
448     count--;
449   }
450   if (aij->recv_waits) PetscFree(aij->recv_waits);
451   if (aij->rvalues)    PetscFree(aij->rvalues);
452 
453   /* wait on sends */
454   if (aij->nsends) {
455     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
456     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
457     PetscFree(send_status);
458   }
459   if (aij->send_waits) PetscFree(aij->send_waits);
460   if (aij->svalues)    PetscFree(aij->svalues);
461 
462   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
463   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
464 
465   /* determine if any processor has disassembled, if so we must
466      also disassemble ourselfs, in order that we may reassemble. */
467   /*
468      if nonzero structure of submatrix B cannot change then we know that
469      no processor disassembled thus we can skip this stuff
470   */
471   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
472     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
473     if (mat->was_assembled && !other_disassembled) {
474       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
475     }
476   }
477 
478   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
479     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
480   }
481   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
482   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
483 
484   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ "MatZeroEntries_MPIAIJ"
490 int MatZeroEntries_MPIAIJ(Mat A)
491 {
492   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
493   int        ierr;
494 
495   PetscFunctionBegin;
496   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
497   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNC__
502 #define __FUNC__ "MatZeroRows_MPIAIJ"
503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
504 {
505   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
506   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
507   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
508   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
509   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
510   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
511   MPI_Comm       comm = A->comm;
512   MPI_Request    *send_waits,*recv_waits;
513   MPI_Status     recv_status,*send_status;
514   IS             istmp;
515   PetscTruth     localdiag;
516 
517   PetscFunctionBegin;
518   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
519   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
520 
521   /*  first count number of contributors to each processor */
522   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
523   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
524   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
525   for ( i=0; i<N; i++ ) {
526     idx = rows[i];
527     found = 0;
528     for ( j=0; j<size; j++ ) {
529       if (idx >= owners[j] && idx < owners[j+1]) {
530         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
531       }
532     }
533     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
534   }
535   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
536 
537   /* inform other processors of number of messages and max length*/
538   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
539   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
540   nrecvs = work[rank];
541   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
542   nmax = work[rank];
543   PetscFree(work);
544 
545   /* post receives:   */
546   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
547   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
548   for ( i=0; i<nrecvs; i++ ) {
549     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
550   }
551 
552   /* do sends:
553       1) starts[i] gives the starting index in svalues for stuff going to
554          the ith processor
555   */
556   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
557   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
558   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
559   starts[0] = 0;
560   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
561   for ( i=0; i<N; i++ ) {
562     svalues[starts[owner[i]]++] = rows[i];
563   }
564   ISRestoreIndices(is,&rows);
565 
566   starts[0] = 0;
567   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
568   count = 0;
569   for ( i=0; i<size; i++ ) {
570     if (procs[i]) {
571       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
572     }
573   }
574   PetscFree(starts);
575 
576   base = owners[rank];
577 
578   /*  wait on receives */
579   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
580   source = lens + nrecvs;
581   count  = nrecvs; slen = 0;
582   while (count) {
583     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
584     /* unpack receives into our local space */
585     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
586     source[imdex]  = recv_status.MPI_SOURCE;
587     lens[imdex]  = n;
588     slen += n;
589     count--;
590   }
591   PetscFree(recv_waits);
592 
593   /* move the data into the send scatter */
594   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
595   count = 0;
596   for ( i=0; i<nrecvs; i++ ) {
597     values = rvalues + i*nmax;
598     for ( j=0; j<lens[i]; j++ ) {
599       lrows[count++] = values[j] - base;
600     }
601   }
602   PetscFree(rvalues); PetscFree(lens);
603   PetscFree(owner); PetscFree(nprocs);
604 
605   /* actually zap the local rows */
606   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
607   PLogObjectParent(A,istmp);
608 
609   /*
610         Zero the required rows. If the "diagonal block" of the matrix
611      is square and the user wishes to set the diagonal we use seperate
612      code so that MatSetValues() is not called for each diagonal allocating
613      new memory, thus calling lots of mallocs and slowing things down.
614 
615        Contributed by: Mathew Knepley
616   */
617   localdiag = PETSC_FALSE;
618   if (diag && (l->A->M == l->A->N)) {
619     localdiag = PETSC_TRUE;
620     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
621   } else {
622     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
623   }
624   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
625   ierr = ISDestroy(istmp); CHKERRQ(ierr);
626 
627   if (diag && (localdiag == PETSC_FALSE)) {
628     for ( i = 0; i < slen; i++ ) {
629       row = lrows[i] + rstart;
630       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
631     }
632     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
633     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
634   }
635 
636   if (diag) {
637     for ( i = 0; i < slen; i++ ) {
638       row = lrows[i] + rstart;
639       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
640     }
641     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
642     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
643   }
644   PetscFree(lrows);
645 
646   /* wait on sends */
647   if (nsends) {
648     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
649     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
650     PetscFree(send_status);
651   }
652   PetscFree(send_waits); PetscFree(svalues);
653 
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatMult_MPIAIJ"
659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
660 {
661   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
662   int        ierr,nt;
663 
664   PetscFunctionBegin;
665   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
666   if (nt != a->n) {
667     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
668   }
669   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
670   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
671   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
672   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNC__
677 #define __FUNC__ "MatMultAdd_MPIAIJ"
678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
679 {
680   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
681   int        ierr;
682 
683   PetscFunctionBegin;
684   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
685   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
686   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
687   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNC__
692 #define __FUNC__ "MatMultTrans_MPIAIJ"
693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
694 {
695   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696   int        ierr;
697 
698   PetscFunctionBegin;
699   /* do nondiagonal part */
700   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
701   /* send it on its way */
702   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
703   /* do local part */
704   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
705   /* receive remote parts: note this assumes the values are not actually */
706   /* inserted in yy until the next line, which is true for my implementation*/
707   /* but is not perhaps always true. */
708   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
717   int        ierr;
718 
719   PetscFunctionBegin;
720   /* do nondiagonal part */
721   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
722   /* send it on its way */
723   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
724   /* do local part */
725   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
726   /* receive remote parts: note this assumes the values are not actually */
727   /* inserted in yy until the next line, which is true for my implementation*/
728   /* but is not perhaps always true. */
729   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 /*
734   This only works correctly for square matrices where the subblock A->A is the
735    diagonal block
736 */
737 #undef __FUNC__
738 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
740 {
741   int        ierr;
742   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
743 
744   PetscFunctionBegin;
745   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
746   if (a->rstart != a->cstart || a->rend != a->cend) {
747     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
748   }
749   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNC__
754 #define __FUNC__ "MatScale_MPIAIJ"
755 int MatScale_MPIAIJ(Scalar *aa,Mat A)
756 {
757   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
758   int        ierr;
759 
760   PetscFunctionBegin;
761   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
762   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ "MatDestroy_MPIAIJ"
768 int MatDestroy_MPIAIJ(Mat mat)
769 {
770   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
771   int        ierr;
772 
773   PetscFunctionBegin;
774   if (--mat->refct > 0) PetscFunctionReturn(0);
775 
776   if (mat->mapping) {
777     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
778   }
779   if (mat->bmapping) {
780     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
781   }
782 #if defined(USE_PETSC_LOG)
783   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
784 #endif
785   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
786   PetscFree(aij->rowners);
787   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
788   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
789   if (aij->colmap) PetscFree(aij->colmap);
790   if (aij->garray) PetscFree(aij->garray);
791   if (aij->lvec)   VecDestroy(aij->lvec);
792   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
793   if (aij->rowvalues) PetscFree(aij->rowvalues);
794   PetscFree(aij);
795   PLogObjectDestroy(mat);
796   PetscHeaderDestroy(mat);
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatView_MPIAIJ_Binary"
802 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
803 {
804   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
805   int         ierr;
806 
807   PetscFunctionBegin;
808   if (aij->size == 1) {
809     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
810   }
811   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNC__
816 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
817 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
818 {
819   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
820   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
821   int         ierr, format,shift = C->indexshift,rank;
822   FILE        *fd;
823   ViewerType  vtype;
824 
825   PetscFunctionBegin;
826   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
827   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
828     ierr = ViewerGetFormat(viewer,&format);
829     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
830       MatInfo info;
831       int     flg;
832       MPI_Comm_rank(mat->comm,&rank);
833       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
834       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
835       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
836       PetscSequentialPhaseBegin(mat->comm,1);
837       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
838          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
839       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
840          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
841       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
842       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
843       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
844       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
845       fflush(fd);
846       PetscSequentialPhaseEnd(mat->comm,1);
847       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
848       PetscFunctionReturn(0);
849     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
850       PetscFunctionReturn(0);
851     }
852   }
853 
854   if (vtype == DRAW_VIEWER) {
855     Draw       draw;
856     PetscTruth isnull;
857     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
858     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
859   }
860 
861   if (vtype == ASCII_FILE_VIEWER) {
862     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
863     PetscSequentialPhaseBegin(mat->comm,1);
864     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
865            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
866            aij->cend);
867     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
868     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
869     fflush(fd);
870     PetscSequentialPhaseEnd(mat->comm,1);
871   } else {
872     int size = aij->size;
873     rank = aij->rank;
874     if (size == 1) {
875       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
876     } else {
877       /* assemble the entire matrix onto first processor. */
878       Mat         A;
879       Mat_SeqAIJ *Aloc;
880       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
881       Scalar      *a;
882 
883       if (!rank) {
884         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
885       } else {
886         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
887       }
888       PLogObjectParent(mat,A);
889 
890       /* copy over the A part */
891       Aloc = (Mat_SeqAIJ*) aij->A->data;
892       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
893       row = aij->rstart;
894       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
895       for ( i=0; i<m; i++ ) {
896         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
897         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
898       }
899       aj = Aloc->j;
900       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
901 
902       /* copy over the B part */
903       Aloc = (Mat_SeqAIJ*) aij->B->data;
904       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
905       row = aij->rstart;
906       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
907       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
908       for ( i=0; i<m; i++ ) {
909         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
910         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
911       }
912       PetscFree(ct);
913       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
914       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
915       /*
916          Everyone has to call to draw the matrix since the graphics waits are
917          synchronized across all processors that share the Draw object
918       */
919       if (!rank || vtype == DRAW_VIEWER) {
920         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
921       }
922       ierr = MatDestroy(A); CHKERRQ(ierr);
923     }
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNC__
929 #define __FUNC__ "MatView_MPIAIJ"
930 int MatView_MPIAIJ(Mat mat,Viewer viewer)
931 {
932   int         ierr;
933   ViewerType  vtype;
934 
935   PetscFunctionBegin;
936   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
937   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
938       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
939     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
940   } else if (vtype == BINARY_FILE_VIEWER) {
941     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
942   } else {
943     SETERRQ(1,1,"Viewer type not supported by PETSc object");
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 /*
949     This has to provide several versions.
950 
951      2) a) use only local smoothing updating outer values only once.
952         b) local smoothing updating outer values each inner iteration
953      3) color updating out values betwen colors.
954 */
955 #undef __FUNC__
956 #define __FUNC__ "MatRelax_MPIAIJ"
957 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
958                            double fshift,int its,Vec xx)
959 {
960   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
961   Mat        AA = mat->A, BB = mat->B;
962   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
963   Scalar     *b,*x,*xs,*ls,d,*v,sum;
964   int        ierr,*idx, *diag;
965   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
966 
967   PetscFunctionBegin;
968   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
969   xs = x + shift; /* shift by one for index start of 1 */
970   ls = ls + shift;
971   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
972   diag = A->diag;
973   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
974     if (flag & SOR_ZERO_INITIAL_GUESS) {
975       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
976       PetscFunctionReturn(0);
977     }
978     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
979     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
980     while (its--) {
981       /* go down through the rows */
982       for ( i=0; i<m; i++ ) {
983         n    = A->i[i+1] - A->i[i];
984         idx  = A->j + A->i[i] + shift;
985         v    = A->a + A->i[i] + shift;
986         sum  = b[i];
987         SPARSEDENSEMDOT(sum,xs,v,idx,n);
988         d    = fshift + A->a[diag[i]+shift];
989         n    = B->i[i+1] - B->i[i];
990         idx  = B->j + B->i[i] + shift;
991         v    = B->a + B->i[i] + shift;
992         SPARSEDENSEMDOT(sum,ls,v,idx,n);
993         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
994       }
995       /* come up through the rows */
996       for ( i=m-1; i>-1; i-- ) {
997         n    = A->i[i+1] - A->i[i];
998         idx  = A->j + A->i[i] + shift;
999         v    = A->a + A->i[i] + shift;
1000         sum  = b[i];
1001         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1002         d    = fshift + A->a[diag[i]+shift];
1003         n    = B->i[i+1] - B->i[i];
1004         idx  = B->j + B->i[i] + shift;
1005         v    = B->a + B->i[i] + shift;
1006         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1007         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1008       }
1009     }
1010   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1011     if (flag & SOR_ZERO_INITIAL_GUESS) {
1012       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1013       PetscFunctionReturn(0);
1014     }
1015     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1016     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1017     while (its--) {
1018       for ( i=0; i<m; i++ ) {
1019         n    = A->i[i+1] - A->i[i];
1020         idx  = A->j + A->i[i] + shift;
1021         v    = A->a + A->i[i] + shift;
1022         sum  = b[i];
1023         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1024         d    = fshift + A->a[diag[i]+shift];
1025         n    = B->i[i+1] - B->i[i];
1026         idx  = B->j + B->i[i] + shift;
1027         v    = B->a + B->i[i] + shift;
1028         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1029         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1030       }
1031     }
1032   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1035       PetscFunctionReturn(0);
1036     }
1037     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1038                             mat->Mvctx); CHKERRQ(ierr);
1039     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1040                             mat->Mvctx); CHKERRQ(ierr);
1041     while (its--) {
1042       for ( i=m-1; i>-1; i-- ) {
1043         n    = A->i[i+1] - A->i[i];
1044         idx  = A->j + A->i[i] + shift;
1045         v    = A->a + A->i[i] + shift;
1046         sum  = b[i];
1047         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1048         d    = fshift + A->a[diag[i]+shift];
1049         n    = B->i[i+1] - B->i[i];
1050         idx  = B->j + B->i[i] + shift;
1051         v    = B->a + B->i[i] + shift;
1052         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1053         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1054       }
1055     }
1056   } else {
1057     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1058   }
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNC__
1063 #define __FUNC__ "MatGetInfo_MPIAIJ"
1064 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1065 {
1066   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1067   Mat        A = mat->A, B = mat->B;
1068   int        ierr;
1069   double     isend[5], irecv[5];
1070 
1071   PetscFunctionBegin;
1072   info->block_size     = 1.0;
1073   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1074   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1075   isend[3] = info->memory;  isend[4] = info->mallocs;
1076   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1077   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1078   isend[3] += info->memory;  isend[4] += info->mallocs;
1079   if (flag == MAT_LOCAL) {
1080     info->nz_used      = isend[0];
1081     info->nz_allocated = isend[1];
1082     info->nz_unneeded  = isend[2];
1083     info->memory       = isend[3];
1084     info->mallocs      = isend[4];
1085   } else if (flag == MAT_GLOBAL_MAX) {
1086     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1087     info->nz_used      = irecv[0];
1088     info->nz_allocated = irecv[1];
1089     info->nz_unneeded  = irecv[2];
1090     info->memory       = irecv[3];
1091     info->mallocs      = irecv[4];
1092   } else if (flag == MAT_GLOBAL_SUM) {
1093     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1094     info->nz_used      = irecv[0];
1095     info->nz_allocated = irecv[1];
1096     info->nz_unneeded  = irecv[2];
1097     info->memory       = irecv[3];
1098     info->mallocs      = irecv[4];
1099   }
1100   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1101   info->fill_ratio_needed = 0;
1102   info->factor_mallocs    = 0;
1103   info->rows_global       = (double)mat->M;
1104   info->columns_global    = (double)mat->N;
1105   info->rows_local        = (double)mat->m;
1106   info->columns_local     = (double)mat->N;
1107 
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNC__
1112 #define __FUNC__ "MatSetOption_MPIAIJ"
1113 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1114 {
1115   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1116 
1117   PetscFunctionBegin;
1118   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1119       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1120       op == MAT_COLUMNS_UNSORTED ||
1121       op == MAT_COLUMNS_SORTED ||
1122       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1123       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1124         MatSetOption(a->A,op);
1125         MatSetOption(a->B,op);
1126   } else if (op == MAT_ROW_ORIENTED) {
1127     a->roworiented = 1;
1128     MatSetOption(a->A,op);
1129     MatSetOption(a->B,op);
1130   } else if (op == MAT_ROWS_SORTED ||
1131              op == MAT_ROWS_UNSORTED ||
1132              op == MAT_SYMMETRIC ||
1133              op == MAT_STRUCTURALLY_SYMMETRIC ||
1134              op == MAT_YES_NEW_DIAGONALS)
1135     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1136   else if (op == MAT_COLUMN_ORIENTED) {
1137     a->roworiented = 0;
1138     MatSetOption(a->A,op);
1139     MatSetOption(a->B,op);
1140   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1141     a->donotstash = 1;
1142   } else if (op == MAT_NO_NEW_DIAGONALS){
1143     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1144   } else {
1145     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1146   }
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNC__
1151 #define __FUNC__ "MatGetSize_MPIAIJ"
1152 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1153 {
1154   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1155 
1156   PetscFunctionBegin;
1157   if (m) *m = mat->M;
1158   if (n) *n = mat->N;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNC__
1163 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1164 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1165 {
1166   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1167 
1168   PetscFunctionBegin;
1169   if (m) *m = mat->m;
1170   if (n) *n = mat->n;
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNC__
1175 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1176 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1177 {
1178   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1179 
1180   PetscFunctionBegin;
1181   *m = mat->rstart; *n = mat->rend;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1186 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1187 
1188 #undef __FUNC__
1189 #define __FUNC__ "MatGetRow_MPIAIJ"
1190 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1191 {
1192   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1193   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1194   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1195   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1196   int        *cmap, *idx_p;
1197 
1198   PetscFunctionBegin;
1199   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1200   mat->getrowactive = PETSC_TRUE;
1201 
1202   if (!mat->rowvalues && (idx || v)) {
1203     /*
1204         allocate enough space to hold information from the longest row.
1205     */
1206     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1207     int     max = 1,m = mat->m,tmp;
1208     for ( i=0; i<m; i++ ) {
1209       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1210       if (max < tmp) { max = tmp; }
1211     }
1212     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1213     CHKPTRQ(mat->rowvalues);
1214     mat->rowindices = (int *) (mat->rowvalues + max);
1215   }
1216 
1217   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1218   lrow = row - rstart;
1219 
1220   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1221   if (!v)   {pvA = 0; pvB = 0;}
1222   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1223   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1224   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1225   nztot = nzA + nzB;
1226 
1227   cmap  = mat->garray;
1228   if (v  || idx) {
1229     if (nztot) {
1230       /* Sort by increasing column numbers, assuming A and B already sorted */
1231       int imark = -1;
1232       if (v) {
1233         *v = v_p = mat->rowvalues;
1234         for ( i=0; i<nzB; i++ ) {
1235           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1236           else break;
1237         }
1238         imark = i;
1239         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1240         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1241       }
1242       if (idx) {
1243         *idx = idx_p = mat->rowindices;
1244         if (imark > -1) {
1245           for ( i=0; i<imark; i++ ) {
1246             idx_p[i] = cmap[cworkB[i]];
1247           }
1248         } else {
1249           for ( i=0; i<nzB; i++ ) {
1250             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1251             else break;
1252           }
1253           imark = i;
1254         }
1255         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1256         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1257       }
1258     }
1259     else {
1260       if (idx) *idx = 0;
1261       if (v)   *v   = 0;
1262     }
1263   }
1264   *nz = nztot;
1265   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1266   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNC__
1271 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1272 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1273 {
1274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1275 
1276   PetscFunctionBegin;
1277   if (aij->getrowactive == PETSC_FALSE) {
1278     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1279   }
1280   aij->getrowactive = PETSC_FALSE;
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNC__
1285 #define __FUNC__ "MatNorm_MPIAIJ"
1286 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1287 {
1288   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1289   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1290   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1291   double     sum = 0.0;
1292   Scalar     *v;
1293 
1294   PetscFunctionBegin;
1295   if (aij->size == 1) {
1296     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1297   } else {
1298     if (type == NORM_FROBENIUS) {
1299       v = amat->a;
1300       for (i=0; i<amat->nz; i++ ) {
1301 #if defined(USE_PETSC_COMPLEX)
1302         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1303 #else
1304         sum += (*v)*(*v); v++;
1305 #endif
1306       }
1307       v = bmat->a;
1308       for (i=0; i<bmat->nz; i++ ) {
1309 #if defined(USE_PETSC_COMPLEX)
1310         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1311 #else
1312         sum += (*v)*(*v); v++;
1313 #endif
1314       }
1315       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1316       *norm = sqrt(*norm);
1317     } else if (type == NORM_1) { /* max column norm */
1318       double *tmp, *tmp2;
1319       int    *jj, *garray = aij->garray;
1320       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1321       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1322       PetscMemzero(tmp,aij->N*sizeof(double));
1323       *norm = 0.0;
1324       v = amat->a; jj = amat->j;
1325       for ( j=0; j<amat->nz; j++ ) {
1326         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1327       }
1328       v = bmat->a; jj = bmat->j;
1329       for ( j=0; j<bmat->nz; j++ ) {
1330         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1331       }
1332       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1333       for ( j=0; j<aij->N; j++ ) {
1334         if (tmp2[j] > *norm) *norm = tmp2[j];
1335       }
1336       PetscFree(tmp); PetscFree(tmp2);
1337     } else if (type == NORM_INFINITY) { /* max row norm */
1338       double ntemp = 0.0;
1339       for ( j=0; j<amat->m; j++ ) {
1340         v = amat->a + amat->i[j] + shift;
1341         sum = 0.0;
1342         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1343           sum += PetscAbsScalar(*v); v++;
1344         }
1345         v = bmat->a + bmat->i[j] + shift;
1346         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1347           sum += PetscAbsScalar(*v); v++;
1348         }
1349         if (sum > ntemp) ntemp = sum;
1350       }
1351       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1352     } else {
1353       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1354     }
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNC__
1360 #define __FUNC__ "MatTranspose_MPIAIJ"
1361 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1362 {
1363   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1364   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1365   int        ierr,shift = Aloc->indexshift;
1366   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1367   Mat        B;
1368   Scalar     *array;
1369 
1370   PetscFunctionBegin;
1371   if (matout == PETSC_NULL && M != N) {
1372     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1373   }
1374 
1375   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1376 
1377   /* copy over the A part */
1378   Aloc = (Mat_SeqAIJ*) a->A->data;
1379   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1380   row = a->rstart;
1381   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1382   for ( i=0; i<m; i++ ) {
1383     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1384     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1385   }
1386   aj = Aloc->j;
1387   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1388 
1389   /* copy over the B part */
1390   Aloc = (Mat_SeqAIJ*) a->B->data;
1391   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1392   row = a->rstart;
1393   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1394   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1395   for ( i=0; i<m; i++ ) {
1396     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1397     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1398   }
1399   PetscFree(ct);
1400   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1401   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1402   if (matout != PETSC_NULL) {
1403     *matout = B;
1404   } else {
1405     PetscOps       *Abops;
1406     struct _MatOps *Aops;
1407 
1408     /* This isn't really an in-place transpose .... but free data structures from a */
1409     PetscFree(a->rowners);
1410     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1411     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1412     if (a->colmap) PetscFree(a->colmap);
1413     if (a->garray) PetscFree(a->garray);
1414     if (a->lvec) VecDestroy(a->lvec);
1415     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1416     PetscFree(a);
1417 
1418     /*
1419        This is horrible, horrible code. We need to keep the
1420       A pointers for the bops and ops but copy everything
1421       else from C.
1422     */
1423     Abops = A->bops;
1424     Aops  = A->ops;
1425     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1426     A->bops = Abops;
1427     A->ops  = Aops;
1428     PetscHeaderDestroy(B);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNC__
1434 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1435 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1436 {
1437   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1438   Mat a = aij->A, b = aij->B;
1439   int ierr,s1,s2,s3;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1443   if (rr) {
1444     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1445     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1446     /* Overlap communication with computation. */
1447     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1448   }
1449   if (ll) {
1450     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1451     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1452     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1453   }
1454   /* scale  the diagonal block */
1455   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1456 
1457   if (rr) {
1458     /* Do a scatter end and then right scale the off-diagonal block */
1459     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1460     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1461   }
1462 
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 
1467 extern int MatPrintHelp_SeqAIJ(Mat);
1468 #undef __FUNC__
1469 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1470 int MatPrintHelp_MPIAIJ(Mat A)
1471 {
1472   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1473   int        ierr;
1474 
1475   PetscFunctionBegin;
1476   if (!a->rank) {
1477     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNC__
1483 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1484 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1485 {
1486   PetscFunctionBegin;
1487   *bs = 1;
1488   PetscFunctionReturn(0);
1489 }
1490 #undef __FUNC__
1491 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1492 int MatSetUnfactored_MPIAIJ(Mat A)
1493 {
1494   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1495   int        ierr;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNC__
1503 #define __FUNC__ "MatEqual_MPIAIJ"
1504 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1505 {
1506   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1507   Mat        a, b, c, d;
1508   PetscTruth flg;
1509   int        ierr;
1510 
1511   PetscFunctionBegin;
1512   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1513   a = matA->A; b = matA->B;
1514   c = matB->A; d = matB->B;
1515 
1516   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1517   if (flg == PETSC_TRUE) {
1518     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1519   }
1520   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1525 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1526 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1527 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1528 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1529 
1530 /* -------------------------------------------------------------------*/
1531 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1532        MatGetRow_MPIAIJ,
1533        MatRestoreRow_MPIAIJ,
1534        MatMult_MPIAIJ,
1535        MatMultAdd_MPIAIJ,
1536        MatMultTrans_MPIAIJ,
1537        MatMultTransAdd_MPIAIJ,
1538        0,
1539        0,
1540        0,
1541        0,
1542        0,
1543        0,
1544        MatRelax_MPIAIJ,
1545        MatTranspose_MPIAIJ,
1546        MatGetInfo_MPIAIJ,
1547        MatEqual_MPIAIJ,
1548        MatGetDiagonal_MPIAIJ,
1549        MatDiagonalScale_MPIAIJ,
1550        MatNorm_MPIAIJ,
1551        MatAssemblyBegin_MPIAIJ,
1552        MatAssemblyEnd_MPIAIJ,
1553        0,
1554        MatSetOption_MPIAIJ,
1555        MatZeroEntries_MPIAIJ,
1556        MatZeroRows_MPIAIJ,
1557        0,
1558        0,
1559        0,
1560        0,
1561        MatGetSize_MPIAIJ,
1562        MatGetLocalSize_MPIAIJ,
1563        MatGetOwnershipRange_MPIAIJ,
1564        0,
1565        0,
1566        0,
1567        0,
1568        MatConvertSameType_MPIAIJ,
1569        0,
1570        0,
1571        0,
1572        0,
1573        0,
1574        MatGetSubMatrices_MPIAIJ,
1575        MatIncreaseOverlap_MPIAIJ,
1576        MatGetValues_MPIAIJ,
1577        0,
1578        MatPrintHelp_MPIAIJ,
1579        MatScale_MPIAIJ,
1580        0,
1581        0,
1582        0,
1583        MatGetBlockSize_MPIAIJ,
1584        0,
1585        0,
1586        0,
1587        0,
1588        MatFDColoringCreate_MPIAIJ,
1589        0,
1590        MatSetUnfactored_MPIAIJ,
1591        0,
1592        0,
1593        MatGetSubMatrix_MPIAIJ,
1594        0,
1595        0,
1596        MatGetMaps_Petsc};
1597 
1598 
1599 #undef __FUNC__
1600 #define __FUNC__ "MatCreateMPIAIJ"
1601 /*@C
1602    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1603    (the default parallel PETSc format).  For good matrix assembly performance
1604    the user should preallocate the matrix storage by setting the parameters
1605    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1606    performance can be increased by more than a factor of 50.
1607 
1608    Collective on MPI_Comm
1609 
1610    Input Parameters:
1611 +  comm - MPI communicator
1612 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1613            This value should be the same as the local size used in creating the
1614            y vector for the matrix-vector product y = Ax.
1615 .  n - This value should be the same as the local size used in creating the
1616        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1617        calculated if N is given) For square matrices n is almost always m.
1618 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1619 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1620 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1621            (same for all local rows)
1622 .  d_nzz - array containing the number of nonzeros in the various rows of the
1623            diagonal portion of the local submatrix (possibly different for each row)
1624            or PETSC_NULL. You must leave room for the diagonal entry even if
1625            it is zero.
1626 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1627            submatrix (same for all local rows).
1628 -  o_nzz - array containing the number of nonzeros in the various rows of the
1629            off-diagonal portion of the local submatrix (possibly different for
1630            each row) or PETSC_NULL.
1631 
1632    Output Parameter:
1633 .  A - the matrix
1634 
1635    Notes:
1636    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1637    than it must be used on all processors that share the object for that argument.
1638 
1639    The AIJ format (also called the Yale sparse matrix format or
1640    compressed row storage), is fully compatible with standard Fortran 77
1641    storage.  That is, the stored row and column indices can begin at
1642    either one (as in Fortran) or zero.  See the users manual for details.
1643 
1644    The user MUST specify either the local or global matrix dimensions
1645    (possibly both).
1646 
1647    By default, this format uses inodes (identical nodes) when possible.
1648    We search for consecutive rows with the same nonzero structure, thereby
1649    reusing matrix information to achieve increased efficiency.
1650 
1651    Options Database Keys:
1652 +  -mat_aij_no_inode  - Do not use inodes
1653 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1654 -  -mat_aij_oneindex - Internally use indexing starting at 1
1655         rather than 0.  Note that when calling MatSetValues(),
1656         the user still MUST index entries starting at 0!
1657 
1658    Storage Information:
1659    For a square global matrix we define each processor's diagonal portion
1660    to be its local rows and the corresponding columns (a square submatrix);
1661    each processor's off-diagonal portion encompasses the remainder of the
1662    local matrix (a rectangular submatrix).
1663 
1664    The user can specify preallocated storage for the diagonal part of
1665    the local submatrix with either d_nz or d_nnz (not both).  Set
1666    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1667    memory allocation.  Likewise, specify preallocated storage for the
1668    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1669 
1670    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1671    the figure below we depict these three local rows and all columns (0-11).
1672 
1673 .vb
1674              0 1 2 3 4 5 6 7 8 9 10 11
1675             -------------------
1676      row 3  |  o o o d d d o o o o o o
1677      row 4  |  o o o d d d o o o o o o
1678      row 5  |  o o o d d d o o o o o o
1679             -------------------
1680 .ve
1681 
1682    Thus, any entries in the d locations are stored in the d (diagonal)
1683    submatrix, and any entries in the o locations are stored in the
1684    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1685    stored simply in the MATSEQAIJ format for compressed row storage.
1686 
1687    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1688    and o_nz should indicate the number of nonzeros per row in the o matrix.
1689    In general, for PDE problems in which most nonzeros are near the diagonal,
1690    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1691    or you will get TERRIBLE performance; see the users' manual chapter on
1692    matrices.
1693 
1694 .keywords: matrix, aij, compressed row, sparse, parallel
1695 
1696 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1697 @*/
1698 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1699 {
1700   Mat          B;
1701   Mat_MPIAIJ   *b;
1702   int          ierr, i,sum[2],work[2],size;
1703 
1704   PetscFunctionBegin;
1705   MPI_Comm_size(comm,&size);
1706   if (size == 1) {
1707     if (M == PETSC_DECIDE) M = m;
1708     if (N == PETSC_DECIDE) N = n;
1709     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1710     PetscFunctionReturn(0);
1711   }
1712 
1713   *A = 0;
1714   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1715   PLogObjectCreate(B);
1716   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1717   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1718   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1719   B->ops->destroy    = MatDestroy_MPIAIJ;
1720   B->ops->view       = MatView_MPIAIJ;
1721   B->factor     = 0;
1722   B->assembled  = PETSC_FALSE;
1723   B->mapping    = 0;
1724 
1725   B->insertmode = NOT_SET_VALUES;
1726   b->size       = size;
1727   MPI_Comm_rank(comm,&b->rank);
1728 
1729   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1730     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1731   }
1732 
1733   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1734     work[0] = m; work[1] = n;
1735     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1736     if (M == PETSC_DECIDE) M = sum[0];
1737     if (N == PETSC_DECIDE) N = sum[1];
1738   }
1739   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1740   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1741   b->m = m; B->m = m;
1742   b->n = n; B->n = n;
1743   b->N = N; B->N = N;
1744   b->M = M; B->M = M;
1745 
1746   /* build local table of row and column ownerships */
1747   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1748   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1749   b->cowners = b->rowners + b->size + 2;
1750   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1751   b->rowners[0] = 0;
1752   for ( i=2; i<=b->size; i++ ) {
1753     b->rowners[i] += b->rowners[i-1];
1754   }
1755   b->rstart = b->rowners[b->rank];
1756   b->rend   = b->rowners[b->rank+1];
1757   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1758   b->cowners[0] = 0;
1759   for ( i=2; i<=b->size; i++ ) {
1760     b->cowners[i] += b->cowners[i-1];
1761   }
1762   b->cstart = b->cowners[b->rank];
1763   b->cend   = b->cowners[b->rank+1];
1764 
1765   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1766   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1767   PLogObjectParent(B,b->A);
1768   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1769   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1770   PLogObjectParent(B,b->B);
1771 
1772   /* build cache for off array entries formed */
1773   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1774   b->donotstash  = 0;
1775   b->colmap      = 0;
1776   b->garray      = 0;
1777   b->roworiented = 1;
1778 
1779   /* stuff used for matrix vector multiply */
1780   b->lvec      = 0;
1781   b->Mvctx     = 0;
1782 
1783   /* stuff for MatGetRow() */
1784   b->rowindices   = 0;
1785   b->rowvalues    = 0;
1786   b->getrowactive = PETSC_FALSE;
1787 
1788   *A = B;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNC__
1793 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1794 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1795 {
1796   Mat        mat;
1797   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1798   int        ierr, len=0, flg;
1799 
1800   PetscFunctionBegin;
1801   *newmat       = 0;
1802   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1803   PLogObjectCreate(mat);
1804   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1805   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1806   mat->ops->destroy    = MatDestroy_MPIAIJ;
1807   mat->ops->view       = MatView_MPIAIJ;
1808   mat->factor     = matin->factor;
1809   mat->assembled  = PETSC_TRUE;
1810 
1811   a->m = mat->m   = oldmat->m;
1812   a->n = mat->n   = oldmat->n;
1813   a->M = mat->M   = oldmat->M;
1814   a->N = mat->N   = oldmat->N;
1815 
1816   a->rstart       = oldmat->rstart;
1817   a->rend         = oldmat->rend;
1818   a->cstart       = oldmat->cstart;
1819   a->cend         = oldmat->cend;
1820   a->size         = oldmat->size;
1821   a->rank         = oldmat->rank;
1822   mat->insertmode = NOT_SET_VALUES;
1823   a->rowvalues    = 0;
1824   a->getrowactive = PETSC_FALSE;
1825 
1826   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1827   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1828   a->cowners = a->rowners + a->size + 2;
1829   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1830   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1831   if (oldmat->colmap) {
1832     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1833     PLogObjectMemory(mat,(a->N)*sizeof(int));
1834     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1835   } else a->colmap = 0;
1836   if (oldmat->garray) {
1837     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1838     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1839     PLogObjectMemory(mat,len*sizeof(int));
1840     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1841   } else a->garray = 0;
1842 
1843   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1844   PLogObjectParent(mat,a->lvec);
1845   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1846   PLogObjectParent(mat,a->Mvctx);
1847   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1848   PLogObjectParent(mat,a->A);
1849   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1850   PLogObjectParent(mat,a->B);
1851   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1852   if (flg) {
1853     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1854   }
1855   *newmat = mat;
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #include "sys.h"
1860 
1861 #undef __FUNC__
1862 #define __FUNC__ "MatLoad_MPIAIJ"
1863 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1864 {
1865   Mat          A;
1866   Scalar       *vals,*svals;
1867   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1868   MPI_Status   status;
1869   int          i, nz, ierr, j,rstart, rend, fd;
1870   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1871   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1872   int          tag = ((PetscObject)viewer)->tag;
1873 
1874   PetscFunctionBegin;
1875   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1876   if (!rank) {
1877     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1878     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1879     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1880     if (header[3] < 0) {
1881       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1882     }
1883   }
1884 
1885   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1886   M = header[1]; N = header[2];
1887   /* determine ownership of all rows */
1888   m = M/size + ((M % size) > rank);
1889   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1890   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1891   rowners[0] = 0;
1892   for ( i=2; i<=size; i++ ) {
1893     rowners[i] += rowners[i-1];
1894   }
1895   rstart = rowners[rank];
1896   rend   = rowners[rank+1];
1897 
1898   /* distribute row lengths to all processors */
1899   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1900   offlens = ourlens + (rend-rstart);
1901   if (!rank) {
1902     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1903     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1904     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1905     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1906     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1907     PetscFree(sndcounts);
1908   } else {
1909     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1910   }
1911 
1912   if (!rank) {
1913     /* calculate the number of nonzeros on each processor */
1914     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1915     PetscMemzero(procsnz,size*sizeof(int));
1916     for ( i=0; i<size; i++ ) {
1917       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1918         procsnz[i] += rowlengths[j];
1919       }
1920     }
1921     PetscFree(rowlengths);
1922 
1923     /* determine max buffer needed and allocate it */
1924     maxnz = 0;
1925     for ( i=0; i<size; i++ ) {
1926       maxnz = PetscMax(maxnz,procsnz[i]);
1927     }
1928     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1929 
1930     /* read in my part of the matrix column indices  */
1931     nz     = procsnz[0];
1932     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1933     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1934 
1935     /* read in every one elses and ship off */
1936     for ( i=1; i<size; i++ ) {
1937       nz   = procsnz[i];
1938       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1939       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1940     }
1941     PetscFree(cols);
1942   } else {
1943     /* determine buffer space needed for message */
1944     nz = 0;
1945     for ( i=0; i<m; i++ ) {
1946       nz += ourlens[i];
1947     }
1948     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1949 
1950     /* receive message of column indices*/
1951     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1952     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1953     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1954   }
1955 
1956   /* loop over local rows, determining number of off diagonal entries */
1957   PetscMemzero(offlens,m*sizeof(int));
1958   jj = 0;
1959   for ( i=0; i<m; i++ ) {
1960     for ( j=0; j<ourlens[i]; j++ ) {
1961       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1962       jj++;
1963     }
1964   }
1965 
1966   /* create our matrix */
1967   for ( i=0; i<m; i++ ) {
1968     ourlens[i] -= offlens[i];
1969   }
1970   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1971   A = *newmat;
1972   MatSetOption(A,MAT_COLUMNS_SORTED);
1973   for ( i=0; i<m; i++ ) {
1974     ourlens[i] += offlens[i];
1975   }
1976 
1977   if (!rank) {
1978     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1979 
1980     /* read in my part of the matrix numerical values  */
1981     nz = procsnz[0];
1982     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1983 
1984     /* insert into matrix */
1985     jj      = rstart;
1986     smycols = mycols;
1987     svals   = vals;
1988     for ( i=0; i<m; i++ ) {
1989       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1990       smycols += ourlens[i];
1991       svals   += ourlens[i];
1992       jj++;
1993     }
1994 
1995     /* read in other processors and ship out */
1996     for ( i=1; i<size; i++ ) {
1997       nz   = procsnz[i];
1998       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1999       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2000     }
2001     PetscFree(procsnz);
2002   } else {
2003     /* receive numeric values */
2004     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2005 
2006     /* receive message of values*/
2007     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2008     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2009     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2010 
2011     /* insert into matrix */
2012     jj      = rstart;
2013     smycols = mycols;
2014     svals   = vals;
2015     for ( i=0; i<m; i++ ) {
2016       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2017       smycols += ourlens[i];
2018       svals   += ourlens[i];
2019       jj++;
2020     }
2021   }
2022   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2023 
2024   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2025   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNC__
2030 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2031 /*
2032     Not great since it makes two copies of the submatrix, first an SeqAIJ
2033   in local and then by concatenating the local matrices the end result.
2034   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2035 */
2036 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
2037 {
2038   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2039   Mat        *local,M, Mreuse;
2040   Scalar     *vwork,*aa;
2041   MPI_Comm   comm = mat->comm;
2042   Mat_SeqAIJ *aij;
2043   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2044 
2045   PetscFunctionBegin;
2046   MPI_Comm_rank(comm,&rank);
2047   MPI_Comm_size(comm,&size);
2048 
2049   if (call ==  MAT_REUSE_MATRIX) {
2050     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2051     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2052     local = &Mreuse;
2053     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2054   } else {
2055     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2056     Mreuse = *local;
2057     PetscFree(local);
2058   }
2059 
2060   /*
2061       m - number of local rows
2062       n - number of columns (same on all processors)
2063       rstart - first row in new global matrix generated
2064   */
2065   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2066   if (call == MAT_INITIAL_MATRIX) {
2067     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2068     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2069     ii  = aij->i;
2070     jj  = aij->j;
2071 
2072     /*
2073         Determine the number of non-zeros in the diagonal and off-diagonal
2074         portions of the matrix in order to do correct preallocation
2075     */
2076 
2077     /* first get start and end of "diagonal" columns */
2078     if (csize == PETSC_DECIDE) {
2079       nlocal = n/size + ((n % size) > rank);
2080     } else {
2081       nlocal = csize;
2082     }
2083     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2084     rstart = rend - nlocal;
2085     if (rank == size - 1 && rend != n) {
2086       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2087     }
2088 
2089     /* next, compute all the lengths */
2090     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2091     olens = dlens + m;
2092     for ( i=0; i<m; i++ ) {
2093       jend = ii[i+1] - ii[i];
2094       olen = 0;
2095       dlen = 0;
2096       for ( j=0; j<jend; j++ ) {
2097         if ( *jj < rstart || *jj >= rend) olen++;
2098         else dlen++;
2099         jj++;
2100       }
2101       olens[i] = olen;
2102       dlens[i] = dlen;
2103     }
2104     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2105     PetscFree(dlens);
2106   } else {
2107     int ml,nl;
2108 
2109     M = *newmat;
2110     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2111     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2112     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2113     /*
2114          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2115        rather than the slower MatSetValues().
2116     */
2117     M->was_assembled = PETSC_TRUE;
2118     M->assembled     = PETSC_FALSE;
2119   }
2120   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2121   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2122   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2123   ii  = aij->i;
2124   jj  = aij->j;
2125   aa  = aij->a;
2126   for (i=0; i<m; i++) {
2127     row   = rstart + i;
2128     nz    = ii[i+1] - ii[i];
2129     cwork = jj;     jj += nz;
2130     vwork = aa;     aa += nz;
2131     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2132   }
2133 
2134   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2136   *newmat = M;
2137 
2138   /* save submatrix used in processor for next request */
2139   if (call ==  MAT_INITIAL_MATRIX) {
2140     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2141     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2142   }
2143 
2144   PetscFunctionReturn(0);
2145 }
2146