xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 27508adb5a3bba3f14f8ccd19508af55ddcc5f79)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.269 1998/12/23 19:31:15 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 
22   PetscFunctionBegin;
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   PetscFunctionReturn(0);
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #define CHUNKSIZE   15
33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
34 { \
35  \
36     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
37     rmax = aimax[row]; nrow = ailen[row];  \
38     col1 = col - shift; \
39      \
40     low = 0; high = nrow; \
41     while (high-low > 5) { \
42       t = (low+high)/2; \
43       if (rp[t] > col) high = t; \
44       else             low  = t; \
45     } \
46       for ( _i=0; _i<nrow; _i++ ) { \
47         if (rp[_i] > col1) break; \
48         if (rp[_i] == col1) { \
49           if (addv == ADD_VALUES) ap[_i] += value;   \
50           else                  ap[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (nonew == 1) goto a_noinsert; \
55       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
56       if (nrow >= rmax) { \
57         /* there is no extra room in row, therefore enlarge */ \
58         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
59         Scalar *new_a; \
60  \
61         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
62  \
63         /* malloc new storage space */ \
64         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
65         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
66         new_j   = (int *) (new_a + new_nz); \
67         new_i   = new_j + new_nz; \
68  \
69         /* copy over old data into new slots */ \
70         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
71         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
72         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
73         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
74         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
75                                                            len*sizeof(int)); \
76         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
77         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
78                                                            len*sizeof(Scalar));  \
79         /* free up old matrix storage */ \
80  \
81         PetscFree(a->a);  \
82         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
83         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
84         a->singlemalloc = 1; \
85  \
86         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
87         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
88         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
89         a->maxnz += CHUNKSIZE; \
90         a->reallocs++; \
91       } \
92       N = nrow++ - 1; a->nz++; \
93       /* shift up all the later entries in this row */ \
94       for ( ii=N; ii>=_i; ii-- ) { \
95         rp[ii+1] = rp[ii]; \
96         ap[ii+1] = ap[ii]; \
97       } \
98       rp[_i] = col1;  \
99       ap[_i] = value;  \
100       a_noinsert: ; \
101       ailen[row] = nrow; \
102 }
103 
104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
105 { \
106  \
107     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
108     rmax = bimax[row]; nrow = bilen[row];  \
109     col1 = col - shift; \
110      \
111     low = 0; high = nrow; \
112     while (high-low > 5) { \
113       t = (low+high)/2; \
114       if (rp[t] > col) high = t; \
115       else             low  = t; \
116     } \
117        for ( _i=0; _i<nrow; _i++ ) { \
118         if (rp[_i] > col1) break; \
119         if (rp[_i] == col1) { \
120           if (addv == ADD_VALUES) ap[_i] += value;   \
121           else                  ap[_i] = value; \
122           goto b_noinsert; \
123         } \
124       }  \
125       if (nonew == 1) goto b_noinsert; \
126       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
127       if (nrow >= rmax) { \
128         /* there is no extra room in row, therefore enlarge */ \
129         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
130         Scalar *new_a; \
131  \
132         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
133  \
134         /* malloc new storage space */ \
135         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
136         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
137         new_j   = (int *) (new_a + new_nz); \
138         new_i   = new_j + new_nz; \
139  \
140         /* copy over old data into new slots */ \
141         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
142         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
143         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
144         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
145         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
146                                                            len*sizeof(int)); \
147         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
148         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
149                                                            len*sizeof(Scalar));  \
150         /* free up old matrix storage */ \
151  \
152         PetscFree(b->a);  \
153         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
154         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
155         b->singlemalloc = 1; \
156  \
157         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
158         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
159         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
160         b->maxnz += CHUNKSIZE; \
161         b->reallocs++; \
162       } \
163       N = nrow++ - 1; b->nz++; \
164       /* shift up all the later entries in this row */ \
165       for ( ii=N; ii>=_i; ii-- ) { \
166         rp[ii+1] = rp[ii]; \
167         ap[ii+1] = ap[ii]; \
168       } \
169       rp[_i] = col1;  \
170       ap[_i] = value;  \
171       b_noinsert: ; \
172       bilen[row] = nrow; \
173 }
174 
175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
176 #undef __FUNC__
177 #define __FUNC__ "MatSetValues_MPIAIJ"
178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
179 {
180   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
181   Scalar     value;
182   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
183   int        cstart = aij->cstart, cend = aij->cend,row,col;
184   int        roworiented = aij->roworiented;
185 
186   /* Some Variables required in the macro */
187   Mat        A = aij->A;
188   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
189   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
190   Scalar     *aa = a->a;
191 
192   Mat        B = aij->B;
193   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
194   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
195   Scalar     *ba = b->a;
196 
197   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
198   int        nonew = a->nonew,shift = a->indexshift;
199   Scalar     *ap;
200 
201   PetscFunctionBegin;
202   for ( i=0; i<m; i++ ) {
203 #if defined(USE_PETSC_BOPT_g)
204     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
205     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
206 #endif
207     if (im[i] >= rstart && im[i] < rend) {
208       row = im[i] - rstart;
209       for ( j=0; j<n; j++ ) {
210         if (in[j] >= cstart && in[j] < cend){
211           col = in[j] - cstart;
212           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
213           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
214           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
215         }
216 #if defined(USE_PETSC_BOPT_g)
217         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
218         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
219 #endif
220         else {
221           if (mat->was_assembled) {
222             if (!aij->colmap) {
223               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
224             }
225             col = aij->colmap[in[j]] - 1;
226             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
227               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
228               col =  in[j];
229               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
230               B = aij->B;
231               b = (Mat_SeqAIJ *) B->data;
232               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
233               ba = b->a;
234             }
235           } else col = in[j];
236           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
237           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
238           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
239         }
240       }
241     }
242     else {
243       if (roworiented && !aij->donotstash) {
244         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
245       }
246       else {
247         if (!aij->donotstash) {
248           row = im[i];
249           for ( j=0; j<n; j++ ) {
250             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
251           }
252         }
253       }
254     }
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNC__
260 #define __FUNC__ "MatGetValues_MPIAIJ"
261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
262 {
263   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
264   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
265   int        cstart = aij->cstart, cend = aij->cend,row,col;
266 
267   PetscFunctionBegin;
268   for ( i=0; i<m; i++ ) {
269     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
270     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
271     if (idxm[i] >= rstart && idxm[i] < rend) {
272       row = idxm[i] - rstart;
273       for ( j=0; j<n; j++ ) {
274         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
275         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
276         if (idxn[j] >= cstart && idxn[j] < cend){
277           col = idxn[j] - cstart;
278           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
279         } else {
280           if (!aij->colmap) {
281             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
282           }
283           col = aij->colmap[idxn[j]] - 1;
284           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
285           else {
286             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
287           }
288         }
289       }
290     } else {
291       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
292     }
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNC__
298 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
299 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
300 {
301   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
302   MPI_Comm    comm = mat->comm;
303   int         size = aij->size, *owners = aij->rowners;
304   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
305   MPI_Request *send_waits,*recv_waits;
306   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
307   InsertMode  addv;
308   Scalar      *rvalues,*svalues;
309 
310   PetscFunctionBegin;
311   if (aij->donotstash) {
312     aij->svalues    = 0; aij->rvalues    = 0;
313     aij->nsends     = 0; aij->nrecvs     = 0;
314     aij->send_waits = 0; aij->recv_waits = 0;
315     aij->rmax       = 0;
316     PetscFunctionReturn(0);
317   }
318 
319   /* make sure all processors are either in INSERTMODE or ADDMODE */
320   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
321   if (addv == (ADD_VALUES|INSERT_VALUES)) {
322     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
323   }
324   mat->insertmode = addv; /* in case this processor had no cache */
325 
326   /*  first count number of contributors to each processor */
327   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
328   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
329   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
330   for ( i=0; i<aij->stash.n; i++ ) {
331     idx = aij->stash.idx[i];
332     for ( j=0; j<size; j++ ) {
333       if (idx >= owners[j] && idx < owners[j+1]) {
334         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
335       }
336     }
337   }
338   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
339 
340   /* inform other processors of number of messages and max length*/
341   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
342   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
343   nreceives = work[rank];
344   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
345   nmax = work[rank];
346   PetscFree(work);
347 
348   /* post receives:
349        1) each message will consist of ordered pairs
350      (global index,value) we store the global index as a double
351      to simplify the message passing.
352        2) since we don't know how long each individual message is we
353      allocate the largest needed buffer for each receive. Potentially
354      this is a lot of wasted space.
355 
356 
357        This could be done better.
358   */
359   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
360   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
361   for ( i=0; i<nreceives; i++ ) {
362     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
363               comm,recv_waits+i);CHKERRQ(ierr);
364   }
365 
366   /* do sends:
367       1) starts[i] gives the starting index in svalues for stuff going to
368          the ith processor
369   */
370   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
371   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
372   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
373   starts[0]  = 0;
374   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
375   for ( i=0; i<aij->stash.n; i++ ) {
376     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
377     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
378     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
379   }
380   PetscFree(owner);
381   starts[0] = 0;
382   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
383   count = 0;
384   for ( i=0; i<size; i++ ) {
385     if (procs[i]) {
386       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
387     }
388   }
389   PetscFree(starts);
390   PetscFree(nprocs);
391 
392   /* Free cache space */
393   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
394   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
395 
396   aij->svalues    = svalues;    aij->rvalues    = rvalues;
397   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
398   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
399   aij->rmax       = nmax;
400 
401   PetscFunctionReturn(0);
402 }
403 extern int MatSetUpMultiply_MPIAIJ(Mat);
404 
405 #undef __FUNC__
406 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
407 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
408 {
409   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
410   MPI_Status  *send_status,recv_status;
411   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
412   int         row,col,other_disassembled;
413   Scalar      *values,val;
414   InsertMode  addv = mat->insertmode;
415 
416   PetscFunctionBegin;
417 
418   /*  wait on receives */
419   while (count) {
420     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
421     /* unpack receives into our local space */
422     values = aij->rvalues + 3*imdex*aij->rmax;
423     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
424     n = n/3;
425     for ( i=0; i<n; i++ ) {
426       row = (int) PetscReal(values[3*i]) - aij->rstart;
427       col = (int) PetscReal(values[3*i+1]);
428       val = values[3*i+2];
429       if (col >= aij->cstart && col < aij->cend) {
430         col -= aij->cstart;
431         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
432       } else {
433         if (mat->was_assembled || mat->assembled) {
434           if (!aij->colmap) {
435             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
436           }
437           col = aij->colmap[col] - 1;
438           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
439             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
440             col = (int) PetscReal(values[3*i+1]);
441           }
442         }
443         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
444       }
445     }
446     count--;
447   }
448   if (aij->recv_waits) PetscFree(aij->recv_waits);
449   if (aij->rvalues)    PetscFree(aij->rvalues);
450 
451   /* wait on sends */
452   if (aij->nsends) {
453     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
454     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
455     PetscFree(send_status);
456   }
457   if (aij->send_waits) PetscFree(aij->send_waits);
458   if (aij->svalues)    PetscFree(aij->svalues);
459 
460   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
461   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
462 
463   /* determine if any processor has disassembled, if so we must
464      also disassemble ourselfs, in order that we may reassemble. */
465   /*
466      if nonzero structure of submatrix B cannot change then we know that
467      no processor disassembled thus we can skip this stuff
468   */
469   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
470     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
471     if (mat->was_assembled && !other_disassembled) {
472       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
473     }
474   }
475 
476   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
477     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
478   }
479   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
480   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
481 
482   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
483   PetscFunctionReturn(0);
484 }
485 
486 #undef __FUNC__
487 #define __FUNC__ "MatZeroEntries_MPIAIJ"
488 int MatZeroEntries_MPIAIJ(Mat A)
489 {
490   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
491   int        ierr;
492 
493   PetscFunctionBegin;
494   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
495   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNC__
500 #define __FUNC__ "MatZeroRows_MPIAIJ"
501 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
502 {
503   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
504   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
505   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
506   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
507   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
508   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
509   MPI_Comm       comm = A->comm;
510   MPI_Request    *send_waits,*recv_waits;
511   MPI_Status     recv_status,*send_status;
512   IS             istmp;
513   PetscTruth     localdiag;
514 
515   PetscFunctionBegin;
516   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
517   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
518 
519   /*  first count number of contributors to each processor */
520   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
521   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
522   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
523   for ( i=0; i<N; i++ ) {
524     idx = rows[i];
525     found = 0;
526     for ( j=0; j<size; j++ ) {
527       if (idx >= owners[j] && idx < owners[j+1]) {
528         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
529       }
530     }
531     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
532   }
533   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
534 
535   /* inform other processors of number of messages and max length*/
536   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
537   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
538   nrecvs = work[rank];
539   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
540   nmax = work[rank];
541   PetscFree(work);
542 
543   /* post receives:   */
544   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
545   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
546   for ( i=0; i<nrecvs; i++ ) {
547     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
548   }
549 
550   /* do sends:
551       1) starts[i] gives the starting index in svalues for stuff going to
552          the ith processor
553   */
554   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
555   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
556   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
557   starts[0] = 0;
558   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
559   for ( i=0; i<N; i++ ) {
560     svalues[starts[owner[i]]++] = rows[i];
561   }
562   ISRestoreIndices(is,&rows);
563 
564   starts[0] = 0;
565   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
566   count = 0;
567   for ( i=0; i<size; i++ ) {
568     if (procs[i]) {
569       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
570     }
571   }
572   PetscFree(starts);
573 
574   base = owners[rank];
575 
576   /*  wait on receives */
577   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
578   source = lens + nrecvs;
579   count  = nrecvs; slen = 0;
580   while (count) {
581     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
582     /* unpack receives into our local space */
583     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
584     source[imdex]  = recv_status.MPI_SOURCE;
585     lens[imdex]  = n;
586     slen += n;
587     count--;
588   }
589   PetscFree(recv_waits);
590 
591   /* move the data into the send scatter */
592   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
593   count = 0;
594   for ( i=0; i<nrecvs; i++ ) {
595     values = rvalues + i*nmax;
596     for ( j=0; j<lens[i]; j++ ) {
597       lrows[count++] = values[j] - base;
598     }
599   }
600   PetscFree(rvalues); PetscFree(lens);
601   PetscFree(owner); PetscFree(nprocs);
602 
603   /* actually zap the local rows */
604   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
605   PLogObjectParent(A,istmp);
606 
607   /*
608         Zero the required rows. If the "diagonal block" of the matrix
609      is square and the user wishes to set the diagonal we use seperate
610      code so that MatSetValues() is not called for each diagonal allocating
611      new memory, thus calling lots of mallocs and slowing things down.
612 
613        Contributed by: Mathew Knepley
614   */
615   localdiag = PETSC_FALSE;
616   if (diag && (l->A->M == l->A->N)) {
617     localdiag = PETSC_TRUE;
618     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
619   } else {
620     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
621   }
622   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
623   ierr = ISDestroy(istmp); CHKERRQ(ierr);
624 
625   if (diag && (localdiag == PETSC_FALSE)) {
626     for ( i = 0; i < slen; i++ ) {
627       row = lrows[i] + rstart;
628       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
629     }
630     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
631     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
632   }
633 
634   if (diag) {
635     for ( i = 0; i < slen; i++ ) {
636       row = lrows[i] + rstart;
637       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
638     }
639     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
640     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
641   }
642   PetscFree(lrows);
643 
644   /* wait on sends */
645   if (nsends) {
646     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
647     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
648     PetscFree(send_status);
649   }
650   PetscFree(send_waits); PetscFree(svalues);
651 
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatMult_MPIAIJ"
657 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
658 {
659   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
660   int        ierr,nt;
661 
662   PetscFunctionBegin;
663   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
664   if (nt != a->n) {
665     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
666   }
667   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
668   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
669   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
670   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatMultAdd_MPIAIJ"
676 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
683   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
684   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
685   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNC__
690 #define __FUNC__ "MatMultTrans_MPIAIJ"
691 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
692 {
693   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
694   int        ierr;
695 
696   PetscFunctionBegin;
697   /* do nondiagonal part */
698   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
699   /* send it on its way */
700   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
701   /* do local part */
702   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
703   /* receive remote parts: note this assumes the values are not actually */
704   /* inserted in yy until the next line, which is true for my implementation*/
705   /* but is not perhaps always true. */
706   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNC__
711 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
712 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
715   int        ierr;
716 
717   PetscFunctionBegin;
718   /* do nondiagonal part */
719   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
720   /* send it on its way */
721   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
722   /* do local part */
723   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
724   /* receive remote parts: note this assumes the values are not actually */
725   /* inserted in yy until the next line, which is true for my implementation*/
726   /* but is not perhaps always true. */
727   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 /*
732   This only works correctly for square matrices where the subblock A->A is the
733    diagonal block
734 */
735 #undef __FUNC__
736 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
737 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
738 {
739   int        ierr;
740   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
741 
742   PetscFunctionBegin;
743   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
744   if (a->rstart != a->cstart || a->rend != a->cend) {
745     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
746   }
747   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNC__
752 #define __FUNC__ "MatScale_MPIAIJ"
753 int MatScale_MPIAIJ(Scalar *aa,Mat A)
754 {
755   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
756   int        ierr;
757 
758   PetscFunctionBegin;
759   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
760   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNC__
765 #define __FUNC__ "MatDestroy_MPIAIJ"
766 int MatDestroy_MPIAIJ(Mat mat)
767 {
768   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
769   int        ierr;
770 
771   PetscFunctionBegin;
772   if (--mat->refct > 0) PetscFunctionReturn(0);
773 
774   if (mat->mapping) {
775     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
776   }
777   if (mat->bmapping) {
778     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
779   }
780   if (mat->rmap) {
781     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
782   }
783   if (mat->cmap) {
784     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
785   }
786 #if defined(USE_PETSC_LOG)
787   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
788 #endif
789   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
790   PetscFree(aij->rowners);
791   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
792   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
793   if (aij->colmap) PetscFree(aij->colmap);
794   if (aij->garray) PetscFree(aij->garray);
795   if (aij->lvec)   VecDestroy(aij->lvec);
796   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
797   if (aij->rowvalues) PetscFree(aij->rowvalues);
798   PetscFree(aij);
799   PLogObjectDestroy(mat);
800   PetscHeaderDestroy(mat);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNC__
805 #define __FUNC__ "MatView_MPIAIJ_Binary"
806 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
807 {
808   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
809   int         ierr;
810 
811   PetscFunctionBegin;
812   if (aij->size == 1) {
813     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
814   }
815   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNC__
820 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
821 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
822 {
823   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
824   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
825   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
826   int         size = aij->size;
827   FILE        *fd;
828   ViewerType  vtype;
829 
830   PetscFunctionBegin;
831   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
832   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
833     ierr = ViewerGetFormat(viewer,&format);
834     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
835       MatInfo info;
836       int     flg;
837       MPI_Comm_rank(mat->comm,&rank);
838       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
839       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
840       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
841       PetscSequentialPhaseBegin(mat->comm,1);
842       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
843          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
844       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
845          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
846       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
847       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
848       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
849       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
850       fflush(fd);
851       PetscSequentialPhaseEnd(mat->comm,1);
852       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
853       PetscFunctionReturn(0);
854     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
855       PetscFunctionReturn(0);
856     }
857   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
858     Draw       draw;
859     PetscTruth isnull;
860     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
861     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
862   }
863 
864   if (size == 1) {
865     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
866   } else {
867     /* assemble the entire matrix onto first processor. */
868     Mat         A;
869     Mat_SeqAIJ *Aloc;
870     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
871     Scalar      *a;
872 
873     if (!rank) {
874       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
875     } else {
876       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
877     }
878     PLogObjectParent(mat,A);
879 
880     /* copy over the A part */
881     Aloc = (Mat_SeqAIJ*) aij->A->data;
882     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
883     row = aij->rstart;
884     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
885     for ( i=0; i<m; i++ ) {
886       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
887       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
888     }
889     aj = Aloc->j;
890     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
891 
892     /* copy over the B part */
893     Aloc = (Mat_SeqAIJ*) aij->B->data;
894     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
895     row = aij->rstart;
896     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
897     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
898     for ( i=0; i<m; i++ ) {
899       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
900       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
901     }
902     PetscFree(ct);
903     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
904     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
905     /*
906        Everyone has to call to draw the matrix since the graphics waits are
907        synchronized across all processors that share the Draw object
908     */
909     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
910       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
911     }
912     ierr = MatDestroy(A); CHKERRQ(ierr);
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNC__
918 #define __FUNC__ "MatView_MPIAIJ"
919 int MatView_MPIAIJ(Mat mat,Viewer viewer)
920 {
921   int         ierr;
922   ViewerType  vtype;
923 
924   PetscFunctionBegin;
925   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
926   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
927       PetscTypeCompare(vtype,MATLAB_VIEWER)) {
928     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
929   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
930     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
931   } else {
932     SETERRQ(1,1,"Viewer type not supported by PETSc object");
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 /*
938     This has to provide several versions.
939 
940      2) a) use only local smoothing updating outer values only once.
941         b) local smoothing updating outer values each inner iteration
942      3) color updating out values betwen colors.
943 */
944 #undef __FUNC__
945 #define __FUNC__ "MatRelax_MPIAIJ"
946 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
947                            double fshift,int its,Vec xx)
948 {
949   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
950   Mat        AA = mat->A, BB = mat->B;
951   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
952   Scalar     *b,*x,*xs,*ls,d,*v,sum;
953   int        ierr,*idx, *diag;
954   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
955 
956   PetscFunctionBegin;
957   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
958   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
959   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
960   xs = x + shift; /* shift by one for index start of 1 */
961   ls = ls + shift;
962   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
963   diag = A->diag;
964   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
965     if (flag & SOR_ZERO_INITIAL_GUESS) {
966       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
967       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
968       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
969       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     }
972     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
973     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
974     while (its--) {
975       /* go down through the rows */
976       for ( i=0; i<m; i++ ) {
977         n    = A->i[i+1] - A->i[i];
978 	PLogFlops(4*n+3);
979         idx  = A->j + A->i[i] + shift;
980         v    = A->a + A->i[i] + shift;
981         sum  = b[i];
982         SPARSEDENSEMDOT(sum,xs,v,idx,n);
983         d    = fshift + A->a[diag[i]+shift];
984         n    = B->i[i+1] - B->i[i];
985         idx  = B->j + B->i[i] + shift;
986         v    = B->a + B->i[i] + shift;
987         SPARSEDENSEMDOT(sum,ls,v,idx,n);
988         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
989       }
990       /* come up through the rows */
991       for ( i=m-1; i>-1; i-- ) {
992         n    = A->i[i+1] - A->i[i];
993 	PLogFlops(4*n+3)
994         idx  = A->j + A->i[i] + shift;
995         v    = A->a + A->i[i] + shift;
996         sum  = b[i];
997         SPARSEDENSEMDOT(sum,xs,v,idx,n);
998         d    = fshift + A->a[diag[i]+shift];
999         n    = B->i[i+1] - B->i[i];
1000         idx  = B->j + B->i[i] + shift;
1001         v    = B->a + B->i[i] + shift;
1002         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1003         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1004       }
1005     }
1006   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1007     if (flag & SOR_ZERO_INITIAL_GUESS) {
1008       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1009       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1010       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1011       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1012       PetscFunctionReturn(0);
1013     }
1014     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1015     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1016     while (its--) {
1017       for ( i=0; i<m; i++ ) {
1018         n    = A->i[i+1] - A->i[i];
1019 	PLogFlops(4*n+3);
1020         idx  = A->j + A->i[i] + shift;
1021         v    = A->a + A->i[i] + shift;
1022         sum  = b[i];
1023         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1024         d    = fshift + A->a[diag[i]+shift];
1025         n    = B->i[i+1] - B->i[i];
1026         idx  = B->j + B->i[i] + shift;
1027         v    = B->a + B->i[i] + shift;
1028         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1029         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1030       }
1031     }
1032   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1035       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1036       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1037       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1038       PetscFunctionReturn(0);
1039     }
1040     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1042     while (its--) {
1043       for ( i=m-1; i>-1; i-- ) {
1044         n    = A->i[i+1] - A->i[i];
1045 	PLogFlops(4*n+3);
1046         idx  = A->j + A->i[i] + shift;
1047         v    = A->a + A->i[i] + shift;
1048         sum  = b[i];
1049         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1050         d    = fshift + A->a[diag[i]+shift];
1051         n    = B->i[i+1] - B->i[i];
1052         idx  = B->j + B->i[i] + shift;
1053         v    = B->a + B->i[i] + shift;
1054         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1055         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1056       }
1057     }
1058   } else {
1059     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1060   }
1061   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1062   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1063   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ "MatGetInfo_MPIAIJ"
1069 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1070 {
1071   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1072   Mat        A = mat->A, B = mat->B;
1073   int        ierr;
1074   double     isend[5], irecv[5];
1075 
1076   PetscFunctionBegin;
1077   info->block_size     = 1.0;
1078   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1079   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1080   isend[3] = info->memory;  isend[4] = info->mallocs;
1081   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1082   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1083   isend[3] += info->memory;  isend[4] += info->mallocs;
1084   if (flag == MAT_LOCAL) {
1085     info->nz_used      = isend[0];
1086     info->nz_allocated = isend[1];
1087     info->nz_unneeded  = isend[2];
1088     info->memory       = isend[3];
1089     info->mallocs      = isend[4];
1090   } else if (flag == MAT_GLOBAL_MAX) {
1091     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1092     info->nz_used      = irecv[0];
1093     info->nz_allocated = irecv[1];
1094     info->nz_unneeded  = irecv[2];
1095     info->memory       = irecv[3];
1096     info->mallocs      = irecv[4];
1097   } else if (flag == MAT_GLOBAL_SUM) {
1098     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1099     info->nz_used      = irecv[0];
1100     info->nz_allocated = irecv[1];
1101     info->nz_unneeded  = irecv[2];
1102     info->memory       = irecv[3];
1103     info->mallocs      = irecv[4];
1104   }
1105   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1106   info->fill_ratio_needed = 0;
1107   info->factor_mallocs    = 0;
1108   info->rows_global       = (double)mat->M;
1109   info->columns_global    = (double)mat->N;
1110   info->rows_local        = (double)mat->m;
1111   info->columns_local     = (double)mat->N;
1112 
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatSetOption_MPIAIJ"
1118 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1119 {
1120   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1121 
1122   PetscFunctionBegin;
1123   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1124       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1125       op == MAT_COLUMNS_UNSORTED ||
1126       op == MAT_COLUMNS_SORTED ||
1127       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1128       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1129         MatSetOption(a->A,op);
1130         MatSetOption(a->B,op);
1131   } else if (op == MAT_ROW_ORIENTED) {
1132     a->roworiented = 1;
1133     MatSetOption(a->A,op);
1134     MatSetOption(a->B,op);
1135   } else if (op == MAT_ROWS_SORTED ||
1136              op == MAT_ROWS_UNSORTED ||
1137              op == MAT_SYMMETRIC ||
1138              op == MAT_STRUCTURALLY_SYMMETRIC ||
1139              op == MAT_YES_NEW_DIAGONALS)
1140     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1141   else if (op == MAT_COLUMN_ORIENTED) {
1142     a->roworiented = 0;
1143     MatSetOption(a->A,op);
1144     MatSetOption(a->B,op);
1145   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1146     a->donotstash = 1;
1147   } else if (op == MAT_NO_NEW_DIAGONALS){
1148     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1149   } else {
1150     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNC__
1156 #define __FUNC__ "MatGetSize_MPIAIJ"
1157 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1158 {
1159   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1160 
1161   PetscFunctionBegin;
1162   if (m) *m = mat->M;
1163   if (n) *n = mat->N;
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNC__
1168 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1169 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1170 {
1171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1172 
1173   PetscFunctionBegin;
1174   if (m) *m = mat->m;
1175   if (n) *n = mat->n;
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNC__
1180 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1181 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1182 {
1183   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1184 
1185   PetscFunctionBegin;
1186   *m = mat->rstart; *n = mat->rend;
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1191 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1192 
1193 #undef __FUNC__
1194 #define __FUNC__ "MatGetRow_MPIAIJ"
1195 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1196 {
1197   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1198   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1199   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1200   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1201   int        *cmap, *idx_p;
1202 
1203   PetscFunctionBegin;
1204   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1205   mat->getrowactive = PETSC_TRUE;
1206 
1207   if (!mat->rowvalues && (idx || v)) {
1208     /*
1209         allocate enough space to hold information from the longest row.
1210     */
1211     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1212     int     max = 1,m = mat->m,tmp;
1213     for ( i=0; i<m; i++ ) {
1214       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1215       if (max < tmp) { max = tmp; }
1216     }
1217     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1218     mat->rowindices = (int *) (mat->rowvalues + max);
1219   }
1220 
1221   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1222   lrow = row - rstart;
1223 
1224   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1225   if (!v)   {pvA = 0; pvB = 0;}
1226   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1227   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1228   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1229   nztot = nzA + nzB;
1230 
1231   cmap  = mat->garray;
1232   if (v  || idx) {
1233     if (nztot) {
1234       /* Sort by increasing column numbers, assuming A and B already sorted */
1235       int imark = -1;
1236       if (v) {
1237         *v = v_p = mat->rowvalues;
1238         for ( i=0; i<nzB; i++ ) {
1239           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1240           else break;
1241         }
1242         imark = i;
1243         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1244         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1245       }
1246       if (idx) {
1247         *idx = idx_p = mat->rowindices;
1248         if (imark > -1) {
1249           for ( i=0; i<imark; i++ ) {
1250             idx_p[i] = cmap[cworkB[i]];
1251           }
1252         } else {
1253           for ( i=0; i<nzB; i++ ) {
1254             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1255             else break;
1256           }
1257           imark = i;
1258         }
1259         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1260         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1261       }
1262     } else {
1263       if (idx) *idx = 0;
1264       if (v)   *v   = 0;
1265     }
1266   }
1267   *nz = nztot;
1268   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1269   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNC__
1274 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1275 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1276 {
1277   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1278 
1279   PetscFunctionBegin;
1280   if (aij->getrowactive == PETSC_FALSE) {
1281     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1282   }
1283   aij->getrowactive = PETSC_FALSE;
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNC__
1288 #define __FUNC__ "MatNorm_MPIAIJ"
1289 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1290 {
1291   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1292   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1293   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1294   double     sum = 0.0;
1295   Scalar     *v;
1296 
1297   PetscFunctionBegin;
1298   if (aij->size == 1) {
1299     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1300   } else {
1301     if (type == NORM_FROBENIUS) {
1302       v = amat->a;
1303       for (i=0; i<amat->nz; i++ ) {
1304 #if defined(USE_PETSC_COMPLEX)
1305         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1306 #else
1307         sum += (*v)*(*v); v++;
1308 #endif
1309       }
1310       v = bmat->a;
1311       for (i=0; i<bmat->nz; i++ ) {
1312 #if defined(USE_PETSC_COMPLEX)
1313         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1314 #else
1315         sum += (*v)*(*v); v++;
1316 #endif
1317       }
1318       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1319       *norm = sqrt(*norm);
1320     } else if (type == NORM_1) { /* max column norm */
1321       double *tmp, *tmp2;
1322       int    *jj, *garray = aij->garray;
1323       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1324       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1325       PetscMemzero(tmp,aij->N*sizeof(double));
1326       *norm = 0.0;
1327       v = amat->a; jj = amat->j;
1328       for ( j=0; j<amat->nz; j++ ) {
1329         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1330       }
1331       v = bmat->a; jj = bmat->j;
1332       for ( j=0; j<bmat->nz; j++ ) {
1333         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1334       }
1335       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1336       for ( j=0; j<aij->N; j++ ) {
1337         if (tmp2[j] > *norm) *norm = tmp2[j];
1338       }
1339       PetscFree(tmp); PetscFree(tmp2);
1340     } else if (type == NORM_INFINITY) { /* max row norm */
1341       double ntemp = 0.0;
1342       for ( j=0; j<amat->m; j++ ) {
1343         v = amat->a + amat->i[j] + shift;
1344         sum = 0.0;
1345         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1346           sum += PetscAbsScalar(*v); v++;
1347         }
1348         v = bmat->a + bmat->i[j] + shift;
1349         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1350           sum += PetscAbsScalar(*v); v++;
1351         }
1352         if (sum > ntemp) ntemp = sum;
1353       }
1354       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1355     } else {
1356       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1357     }
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNC__
1363 #define __FUNC__ "MatTranspose_MPIAIJ"
1364 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1365 {
1366   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1367   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1368   int        ierr,shift = Aloc->indexshift;
1369   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1370   Mat        B;
1371   Scalar     *array;
1372 
1373   PetscFunctionBegin;
1374   if (matout == PETSC_NULL && M != N) {
1375     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1376   }
1377 
1378   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1379 
1380   /* copy over the A part */
1381   Aloc = (Mat_SeqAIJ*) a->A->data;
1382   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1383   row = a->rstart;
1384   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1385   for ( i=0; i<m; i++ ) {
1386     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1387     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1388   }
1389   aj = Aloc->j;
1390   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1391 
1392   /* copy over the B part */
1393   Aloc = (Mat_SeqAIJ*) a->B->data;
1394   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1395   row = a->rstart;
1396   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1397   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1398   for ( i=0; i<m; i++ ) {
1399     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1400     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1401   }
1402   PetscFree(ct);
1403   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1404   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1405   if (matout != PETSC_NULL) {
1406     *matout = B;
1407   } else {
1408     PetscOps       *Abops;
1409     struct _MatOps *Aops;
1410 
1411     /* This isn't really an in-place transpose .... but free data structures from a */
1412     PetscFree(a->rowners);
1413     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1414     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1415     if (a->colmap) PetscFree(a->colmap);
1416     if (a->garray) PetscFree(a->garray);
1417     if (a->lvec) VecDestroy(a->lvec);
1418     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1419     PetscFree(a);
1420 
1421     /*
1422        This is horrible, horrible code. We need to keep the
1423       A pointers for the bops and ops but copy everything
1424       else from C.
1425     */
1426     Abops = A->bops;
1427     Aops  = A->ops;
1428     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1429     A->bops = Abops;
1430     A->ops  = Aops;
1431     PetscHeaderDestroy(B);
1432   }
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNC__
1437 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1438 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1439 {
1440   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1441   Mat a = aij->A, b = aij->B;
1442   int ierr,s1,s2,s3;
1443 
1444   PetscFunctionBegin;
1445   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1446   if (rr) {
1447     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1448     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1449     /* Overlap communication with computation. */
1450     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1451   }
1452   if (ll) {
1453     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1454     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1455     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1456   }
1457   /* scale  the diagonal block */
1458   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1459 
1460   if (rr) {
1461     /* Do a scatter end and then right scale the off-diagonal block */
1462     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1463     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1464   }
1465 
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 
1470 extern int MatPrintHelp_SeqAIJ(Mat);
1471 #undef __FUNC__
1472 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1473 int MatPrintHelp_MPIAIJ(Mat A)
1474 {
1475   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1476   int        ierr;
1477 
1478   PetscFunctionBegin;
1479   if (!a->rank) {
1480     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNC__
1486 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1487 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1488 {
1489   PetscFunctionBegin;
1490   *bs = 1;
1491   PetscFunctionReturn(0);
1492 }
1493 #undef __FUNC__
1494 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1495 int MatSetUnfactored_MPIAIJ(Mat A)
1496 {
1497   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1498   int        ierr;
1499 
1500   PetscFunctionBegin;
1501   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNC__
1506 #define __FUNC__ "MatEqual_MPIAIJ"
1507 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1508 {
1509   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1510   Mat        a, b, c, d;
1511   PetscTruth flg;
1512   int        ierr;
1513 
1514   PetscFunctionBegin;
1515   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1516   a = matA->A; b = matA->B;
1517   c = matB->A; d = matB->B;
1518 
1519   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1520   if (flg == PETSC_TRUE) {
1521     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1522   }
1523   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNC__
1528 #define __FUNC__ "MatCopy_MPIAIJ"
1529 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1530 {
1531   int        ierr;
1532   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1533   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1534 
1535   PetscFunctionBegin;
1536   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1537     /* because of the column compression in the off-processor part of the matrix a->B,
1538        the number of columns in a->B and b->B may be different, hence we cannot call
1539        the MatCopy() directly on the two parts. If need be, we can provide a more
1540        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1541        then copying the submatrices */
1542     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1543   } else {
1544     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1545     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1551 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1552 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1553 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1554 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1555 
1556 /* -------------------------------------------------------------------*/
1557 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1558        MatGetRow_MPIAIJ,
1559        MatRestoreRow_MPIAIJ,
1560        MatMult_MPIAIJ,
1561        MatMultAdd_MPIAIJ,
1562        MatMultTrans_MPIAIJ,
1563        MatMultTransAdd_MPIAIJ,
1564        0,
1565        0,
1566        0,
1567        0,
1568        0,
1569        0,
1570        MatRelax_MPIAIJ,
1571        MatTranspose_MPIAIJ,
1572        MatGetInfo_MPIAIJ,
1573        MatEqual_MPIAIJ,
1574        MatGetDiagonal_MPIAIJ,
1575        MatDiagonalScale_MPIAIJ,
1576        MatNorm_MPIAIJ,
1577        MatAssemblyBegin_MPIAIJ,
1578        MatAssemblyEnd_MPIAIJ,
1579        0,
1580        MatSetOption_MPIAIJ,
1581        MatZeroEntries_MPIAIJ,
1582        MatZeroRows_MPIAIJ,
1583        0,
1584        0,
1585        0,
1586        0,
1587        MatGetSize_MPIAIJ,
1588        MatGetLocalSize_MPIAIJ,
1589        MatGetOwnershipRange_MPIAIJ,
1590        0,
1591        0,
1592        0,
1593        0,
1594        MatDuplicate_MPIAIJ,
1595        0,
1596        0,
1597        0,
1598        0,
1599        0,
1600        MatGetSubMatrices_MPIAIJ,
1601        MatIncreaseOverlap_MPIAIJ,
1602        MatGetValues_MPIAIJ,
1603        MatCopy_MPIAIJ,
1604        MatPrintHelp_MPIAIJ,
1605        MatScale_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        MatGetBlockSize_MPIAIJ,
1610        0,
1611        0,
1612        0,
1613        0,
1614        MatFDColoringCreate_MPIAIJ,
1615        0,
1616        MatSetUnfactored_MPIAIJ,
1617        0,
1618        0,
1619        MatGetSubMatrix_MPIAIJ,
1620        0,
1621        0,
1622        MatGetMaps_Petsc};
1623 
1624 /* ----------------------------------------------------------------------------------------*/
1625 
1626 EXTERN_C_BEGIN
1627 #undef __FUNC__
1628 #define __FUNC__ "MatStoreValues_MPIAIJ"
1629 int MatStoreValues_MPIAIJ(Mat mat)
1630 {
1631   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1632   int        ierr;
1633 
1634   PetscFunctionBegin;
1635   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1636   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 EXTERN_C_END
1640 
1641 EXTERN_C_BEGIN
1642 #undef __FUNC__
1643 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1644 int MatRetrieveValues_MPIAIJ(Mat mat)
1645 {
1646   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1647   int        ierr;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1651   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 EXTERN_C_END
1655 
1656 #include "pc.h"
1657 EXTERN_C_BEGIN
1658 extern int PCSetUp_BJacobi_AIJ(PC);
1659 EXTERN_C_END
1660 
1661 #undef __FUNC__
1662 #define __FUNC__ "MatCreateMPIAIJ"
1663 /*@C
1664    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1665    (the default parallel PETSc format).  For good matrix assembly performance
1666    the user should preallocate the matrix storage by setting the parameters
1667    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1668    performance can be increased by more than a factor of 50.
1669 
1670    Collective on MPI_Comm
1671 
1672    Input Parameters:
1673 +  comm - MPI communicator
1674 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1675            This value should be the same as the local size used in creating the
1676            y vector for the matrix-vector product y = Ax.
1677 .  n - This value should be the same as the local size used in creating the
1678        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1679        calculated if N is given) For square matrices n is almost always m.
1680 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1681 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1682 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1683            (same value is used for all local rows)
1684 .  d_nnz - array containing the number of nonzeros in the various rows of the
1685            DIAGONAL portion of the local submatrix (possibly different for each row)
1686            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1687            The size of this array is equal to the number of local rows, i.e 'm'.
1688            You must leave room for the diagonal entry even if it is zero.
1689 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1690            submatrix (same value is used for all local rows).
1691 -  o_nnz - array containing the number of nonzeros in the various rows of the
1692            OFF-DIAGONAL portion of the local submatrix (possibly different for
1693            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1694            structure. The size of this array is equal to the number
1695            of local rows, i.e 'm'.
1696 
1697    Output Parameter:
1698 .  A - the matrix
1699 
1700    Notes:
1701    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1702    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1703    storage requirements for this matrix.
1704 
1705    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1706    processor than it must be used on all processors that share the object for
1707    that argument.
1708 
1709    The AIJ format (also called the Yale sparse matrix format or
1710    compressed row storage), is fully compatible with standard Fortran 77
1711    storage.  That is, the stored row and column indices can begin at
1712    either one (as in Fortran) or zero.  See the users manual for details.
1713 
1714    The user MUST specify either the local or global matrix dimensions
1715    (possibly both).
1716 
1717    The parallel matrix is partitioned such that the first m0 rows belong to
1718    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1719    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1720 
1721    The DIAGONAL portion of the local submatrix of a processor can be defined
1722    as the submatrix which is obtained by extraction the part corresponding
1723    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1724    first row that belongs to the processor, and r2 is the last row belonging
1725    to the this processor. This is a square mxm matrix. The remaining portion
1726    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1727 
1728    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1729 
1730    By default, this format uses inodes (identical nodes) when possible.
1731    We search for consecutive rows with the same nonzero structure, thereby
1732    reusing matrix information to achieve increased efficiency.
1733 
1734    Options Database Keys:
1735 +  -mat_aij_no_inode  - Do not use inodes
1736 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1737 -  -mat_aij_oneindex - Internally use indexing starting at 1
1738         rather than 0.  Note that when calling MatSetValues(),
1739         the user still MUST index entries starting at 0!
1740 .   -mat_mpi - use the parallel matrix data structures even on one processor
1741                (defaults to using SeqBAIJ format on one processor)
1742 .   -mat_mpi - use the parallel matrix data structures even on one processor
1743                (defaults to using SeqAIJ format on one processor)
1744 
1745 
1746    Example usage:
1747 
1748    Consider the following 8x8 matrix with 34 non-zero values, that is
1749    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1750    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1751    as follows:
1752 
1753 .vb
1754             1  2  0  |  0  3  0  |  0  4
1755     Proc0   0  5  6  |  7  0  0  |  8  0
1756             9  0 10  | 11  0  0  | 12  0
1757     -------------------------------------
1758            13  0 14  | 15 16 17  |  0  0
1759     Proc1   0 18  0  | 19 20 21  |  0  0
1760             0  0  0  | 22 23  0  | 24  0
1761     -------------------------------------
1762     Proc2  25 26 27  |  0  0 28  | 29  0
1763            30  0  0  | 31 32 33  |  0 34
1764 .ve
1765 
1766    This can be represented as a collection of submatrices as:
1767 
1768 .vb
1769       A B C
1770       D E F
1771       G H I
1772 .ve
1773 
1774    Where the submatrices A,B,C are owned by proc0, D,E,F are
1775    owned by proc1, G,H,I are owned by proc2.
1776 
1777    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1778    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1779    The 'M','N' parameters are 8,8, and have the same values on all procs.
1780 
1781    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1782    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1783    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1784    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1785    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1786    matrix, ans [DF] as another SeqAIJ matrix.
1787 
1788    When d_nz, o_nz parameters are specified, d_nz storage elements are
1789    allocated for every row of the local diagonal submatrix, and o_nz
1790    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1791    One way to choose d_nz and o_nz is to use the max nonzerors per local
1792    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1793    In this case, the values of d_nz,o_nz are:
1794 .vb
1795      proc0 : dnz = 2, o_nz = 2
1796      proc1 : dnz = 3, o_nz = 2
1797      proc2 : dnz = 1, o_nz = 4
1798 .ve
1799    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1800    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1801    for proc3. i.e we are using 12+15+10=37 storage locations to store
1802    34 values.
1803 
1804    When d_nnz, o_nnz parameters are specified, the storage is specified
1805    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1806    In the above case the values for d_nnz,o_nnz are:
1807 .vb
1808      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1809      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1810      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1811 .ve
1812    Here the space allocated is sum of all the above values i.e 34, and
1813    hence pre-allocation is perfect.
1814 
1815 .keywords: matrix, aij, compressed row, sparse, parallel
1816 
1817 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1818 @*/
1819 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1820 {
1821   Mat          B;
1822   Mat_MPIAIJ   *b;
1823   int          ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0;
1824 
1825   PetscFunctionBegin;
1826   MPI_Comm_size(comm,&size);
1827   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1828   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1829   if (!flag1 && !flag2 && size == 1) {
1830     if (M == PETSC_DECIDE) M = m;
1831     if (N == PETSC_DECIDE) N = n;
1832     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1833     PetscFunctionReturn(0);
1834   }
1835 
1836   *A = 0;
1837   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1838   PLogObjectCreate(B);
1839   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1840   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1841   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1842   B->ops->destroy    = MatDestroy_MPIAIJ;
1843   B->ops->view       = MatView_MPIAIJ;
1844   B->factor          = 0;
1845   B->assembled       = PETSC_FALSE;
1846   B->mapping         = 0;
1847 
1848   B->insertmode      = NOT_SET_VALUES;
1849   b->size            = size;
1850   MPI_Comm_rank(comm,&b->rank);
1851 
1852   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1853     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1854   }
1855 
1856   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1857     work[0] = m; work[1] = n;
1858     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1859     if (M == PETSC_DECIDE) M = sum[0];
1860     if (N == PETSC_DECIDE) N = sum[1];
1861   }
1862   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1863   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1864   b->m = m; B->m = m;
1865   b->n = n; B->n = n;
1866   b->N = N; B->N = N;
1867   b->M = M; B->M = M;
1868 
1869   /* the information in the maps duplicates the information computed below, eventually
1870      we should remove the duplicate information that is not contained in the maps */
1871   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1872   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1873 
1874   /* build local table of row and column ownerships */
1875   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1876   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1877   b->cowners = b->rowners + b->size + 2;
1878   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1879   b->rowners[0] = 0;
1880   for ( i=2; i<=b->size; i++ ) {
1881     b->rowners[i] += b->rowners[i-1];
1882   }
1883   b->rstart = b->rowners[b->rank];
1884   b->rend   = b->rowners[b->rank+1];
1885   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1886   b->cowners[0] = 0;
1887   for ( i=2; i<=b->size; i++ ) {
1888     b->cowners[i] += b->cowners[i-1];
1889   }
1890   b->cstart = b->cowners[b->rank];
1891   b->cend   = b->cowners[b->rank+1];
1892 
1893   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1894   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1895   PLogObjectParent(B,b->A);
1896   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1897   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1898   PLogObjectParent(B,b->B);
1899 
1900   /* build cache for off array entries formed */
1901   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1902   b->donotstash  = 0;
1903   b->colmap      = 0;
1904   b->garray      = 0;
1905   b->roworiented = 1;
1906 
1907   /* stuff used for matrix vector multiply */
1908   b->lvec      = 0;
1909   b->Mvctx     = 0;
1910 
1911   /* stuff for MatGetRow() */
1912   b->rowindices   = 0;
1913   b->rowvalues    = 0;
1914   b->getrowactive = PETSC_FALSE;
1915 
1916   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1917                                      "MatStoreValues_MPIAIJ",
1918                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1919   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1920                                      "MatRetrieveValues_MPIAIJ",
1921                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1922   ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C",
1923                                      "PCSetUp_BJacobi_AIJ",
1924                                      (void*)PCSetUp_BJacobi_AIJ);CHKERRQ(ierr);
1925   *A = B;
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNC__
1930 #define __FUNC__ "MatDuplicate_MPIAIJ"
1931 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1932 {
1933   Mat        mat;
1934   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1935   int        ierr, len=0, flg;
1936 
1937   PetscFunctionBegin;
1938   *newmat       = 0;
1939   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1940   PLogObjectCreate(mat);
1941   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1942   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1943   mat->ops->destroy    = MatDestroy_MPIAIJ;
1944   mat->ops->view       = MatView_MPIAIJ;
1945   mat->factor     = matin->factor;
1946   mat->assembled  = PETSC_TRUE;
1947 
1948   a->m = mat->m   = oldmat->m;
1949   a->n = mat->n   = oldmat->n;
1950   a->M = mat->M   = oldmat->M;
1951   a->N = mat->N   = oldmat->N;
1952 
1953   a->rstart       = oldmat->rstart;
1954   a->rend         = oldmat->rend;
1955   a->cstart       = oldmat->cstart;
1956   a->cend         = oldmat->cend;
1957   a->size         = oldmat->size;
1958   a->rank         = oldmat->rank;
1959   mat->insertmode = NOT_SET_VALUES;
1960   a->rowvalues    = 0;
1961   a->getrowactive = PETSC_FALSE;
1962 
1963   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1964   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1965   a->cowners = a->rowners + a->size + 2;
1966   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1967   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1968   if (oldmat->colmap) {
1969     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1970     PLogObjectMemory(mat,(a->N)*sizeof(int));
1971     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1972   } else a->colmap = 0;
1973   if (oldmat->garray) {
1974     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1975     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1976     PLogObjectMemory(mat,len*sizeof(int));
1977     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1978   } else a->garray = 0;
1979 
1980   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1981   PLogObjectParent(mat,a->lvec);
1982   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1983   PLogObjectParent(mat,a->Mvctx);
1984   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1985   PLogObjectParent(mat,a->A);
1986   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
1987   PLogObjectParent(mat,a->B);
1988   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1989   if (flg) {
1990     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1991   }
1992   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1993   *newmat = mat;
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #include "sys.h"
1998 
1999 #undef __FUNC__
2000 #define __FUNC__ "MatLoad_MPIAIJ"
2001 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
2002 {
2003   Mat          A;
2004   Scalar       *vals,*svals;
2005   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2006   MPI_Status   status;
2007   int          i, nz, ierr, j,rstart, rend, fd;
2008   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
2009   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
2010   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
2011 
2012   PetscFunctionBegin;
2013   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2014   if (!rank) {
2015     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2016     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2017     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2018     if (header[3] < 0) {
2019       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
2020     }
2021   }
2022 
2023   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2024   M = header[1]; N = header[2];
2025   /* determine ownership of all rows */
2026   m = M/size + ((M % size) > rank);
2027   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
2028   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2029   rowners[0] = 0;
2030   for ( i=2; i<=size; i++ ) {
2031     rowners[i] += rowners[i-1];
2032   }
2033   rstart = rowners[rank];
2034   rend   = rowners[rank+1];
2035 
2036   /* distribute row lengths to all processors */
2037   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
2038   offlens = ourlens + (rend-rstart);
2039   if (!rank) {
2040     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
2041     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2042     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2043     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2044     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2045     PetscFree(sndcounts);
2046   } else {
2047     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2048   }
2049 
2050   if (!rank) {
2051     /* calculate the number of nonzeros on each processor */
2052     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2053     PetscMemzero(procsnz,size*sizeof(int));
2054     for ( i=0; i<size; i++ ) {
2055       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2056         procsnz[i] += rowlengths[j];
2057       }
2058     }
2059     PetscFree(rowlengths);
2060 
2061     /* determine max buffer needed and allocate it */
2062     maxnz = 0;
2063     for ( i=0; i<size; i++ ) {
2064       maxnz = PetscMax(maxnz,procsnz[i]);
2065     }
2066     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2067 
2068     /* read in my part of the matrix column indices  */
2069     nz     = procsnz[0];
2070     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2071     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2072 
2073     /* read in every one elses and ship off */
2074     for ( i=1; i<size; i++ ) {
2075       nz   = procsnz[i];
2076       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2077       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2078     }
2079     PetscFree(cols);
2080   } else {
2081     /* determine buffer space needed for message */
2082     nz = 0;
2083     for ( i=0; i<m; i++ ) {
2084       nz += ourlens[i];
2085     }
2086     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2087 
2088     /* receive message of column indices*/
2089     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2090     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2091     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2092   }
2093 
2094   /* determine column ownership if matrix is not square */
2095   if (N != M) {
2096     n      = N/size + ((N % size) > rank);
2097     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2098     cstart = cend - n;
2099   } else {
2100     cstart = rstart;
2101     cend   = rend;
2102     n      = cend - cstart;
2103   }
2104 
2105   /* loop over local rows, determining number of off diagonal entries */
2106   PetscMemzero(offlens,m*sizeof(int));
2107   jj = 0;
2108   for ( i=0; i<m; i++ ) {
2109     for ( j=0; j<ourlens[i]; j++ ) {
2110       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2111       jj++;
2112     }
2113   }
2114 
2115   /* create our matrix */
2116   for ( i=0; i<m; i++ ) {
2117     ourlens[i] -= offlens[i];
2118   }
2119   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2120   A = *newmat;
2121   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2122   for ( i=0; i<m; i++ ) {
2123     ourlens[i] += offlens[i];
2124   }
2125 
2126   if (!rank) {
2127     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2128 
2129     /* read in my part of the matrix numerical values  */
2130     nz   = procsnz[0];
2131     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2132 
2133     /* insert into matrix */
2134     jj      = rstart;
2135     smycols = mycols;
2136     svals   = vals;
2137     for ( i=0; i<m; i++ ) {
2138       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2139       smycols += ourlens[i];
2140       svals   += ourlens[i];
2141       jj++;
2142     }
2143 
2144     /* read in other processors and ship out */
2145     for ( i=1; i<size; i++ ) {
2146       nz   = procsnz[i];
2147       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2148       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2149     }
2150     PetscFree(procsnz);
2151   } else {
2152     /* receive numeric values */
2153     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2154 
2155     /* receive message of values*/
2156     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2157     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2158     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2159 
2160     /* insert into matrix */
2161     jj      = rstart;
2162     smycols = mycols;
2163     svals   = vals;
2164     for ( i=0; i<m; i++ ) {
2165       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2166       smycols += ourlens[i];
2167       svals   += ourlens[i];
2168       jj++;
2169     }
2170   }
2171   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2172 
2173   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2174   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #undef __FUNC__
2179 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2180 /*
2181     Not great since it makes two copies of the submatrix, first an SeqAIJ
2182   in local and then by concatenating the local matrices the end result.
2183   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2184 */
2185 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
2186 {
2187   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2188   Mat        *local,M, Mreuse;
2189   Scalar     *vwork,*aa;
2190   MPI_Comm   comm = mat->comm;
2191   Mat_SeqAIJ *aij;
2192   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2193 
2194   PetscFunctionBegin;
2195   MPI_Comm_rank(comm,&rank);
2196   MPI_Comm_size(comm,&size);
2197 
2198   if (call ==  MAT_REUSE_MATRIX) {
2199     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2200     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2201     local = &Mreuse;
2202     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2203   } else {
2204     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2205     Mreuse = *local;
2206     PetscFree(local);
2207   }
2208 
2209   /*
2210       m - number of local rows
2211       n - number of columns (same on all processors)
2212       rstart - first row in new global matrix generated
2213   */
2214   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2215   if (call == MAT_INITIAL_MATRIX) {
2216     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2217     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2218     ii  = aij->i;
2219     jj  = aij->j;
2220 
2221     /*
2222         Determine the number of non-zeros in the diagonal and off-diagonal
2223         portions of the matrix in order to do correct preallocation
2224     */
2225 
2226     /* first get start and end of "diagonal" columns */
2227     if (csize == PETSC_DECIDE) {
2228       nlocal = n/size + ((n % size) > rank);
2229     } else {
2230       nlocal = csize;
2231     }
2232     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2233     rstart = rend - nlocal;
2234     if (rank == size - 1 && rend != n) {
2235       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2236     }
2237 
2238     /* next, compute all the lengths */
2239     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2240     olens = dlens + m;
2241     for ( i=0; i<m; i++ ) {
2242       jend = ii[i+1] - ii[i];
2243       olen = 0;
2244       dlen = 0;
2245       for ( j=0; j<jend; j++ ) {
2246         if ( *jj < rstart || *jj >= rend) olen++;
2247         else dlen++;
2248         jj++;
2249       }
2250       olens[i] = olen;
2251       dlens[i] = dlen;
2252     }
2253     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2254     PetscFree(dlens);
2255   } else {
2256     int ml,nl;
2257 
2258     M = *newmat;
2259     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2260     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2261     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2262     /*
2263          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2264        rather than the slower MatSetValues().
2265     */
2266     M->was_assembled = PETSC_TRUE;
2267     M->assembled     = PETSC_FALSE;
2268   }
2269   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2270   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2271   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2272   ii  = aij->i;
2273   jj  = aij->j;
2274   aa  = aij->a;
2275   for (i=0; i<m; i++) {
2276     row   = rstart + i;
2277     nz    = ii[i+1] - ii[i];
2278     cwork = jj;     jj += nz;
2279     vwork = aa;     aa += nz;
2280     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2281   }
2282 
2283   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2284   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2285   *newmat = M;
2286 
2287   /* save submatrix used in processor for next request */
2288   if (call ==  MAT_INITIAL_MATRIX) {
2289     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2290     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2291   }
2292 
2293   PetscFunctionReturn(0);
2294 }
2295