xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e2d53e46d4ac3432676e6a92874d890db252935a)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.277 1999/02/11 20:16:32 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 #if defined (USE_CTABLE)
22   int        ierr;
23 #endif
24 
25   PetscFunctionBegin;
26 #if defined (USE_CTABLE)
27   ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr);
28   for ( i=0; i<n; i++ ){
29     ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr);
30   }
31 #else
32   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
33   PLogObjectMemory(mat,aij->N*sizeof(int));
34   PetscMemzero(aij->colmap,aij->N*sizeof(int));
35   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
36 #endif
37   PetscFunctionReturn(0);
38 }
39 
40 extern int DisAssemble_MPIAIJ(Mat);
41 
42 #define CHUNKSIZE   15
43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
44 { \
45  \
46     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
47     rmax = aimax[row]; nrow = ailen[row];  \
48     col1 = col - shift; \
49      \
50     low = 0; high = nrow; \
51     while (high-low > 5) { \
52       t = (low+high)/2; \
53       if (rp[t] > col) high = t; \
54       else             low  = t; \
55     } \
56       for ( _i=0; _i<nrow; _i++ ) { \
57         if (rp[_i] > col1) break; \
58         if (rp[_i] == col1) { \
59           if (addv == ADD_VALUES) ap[_i] += value;   \
60           else                  ap[_i] = value; \
61           goto a_noinsert; \
62         } \
63       }  \
64       if (nonew == 1) goto a_noinsert; \
65       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
66       if (nrow >= rmax) { \
67         /* there is no extra room in row, therefore enlarge */ \
68         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
69         Scalar *new_a; \
70  \
71         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
72  \
73         /* malloc new storage space */ \
74         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
75         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
76         new_j   = (int *) (new_a + new_nz); \
77         new_i   = new_j + new_nz; \
78  \
79         /* copy over old data into new slots */ \
80         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
81         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
82         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
83         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
84         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
85                                                            len*sizeof(int)); \
86         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
87         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
88                                                            len*sizeof(Scalar));  \
89         /* free up old matrix storage */ \
90  \
91         PetscFree(a->a);  \
92         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
93         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
94         a->singlemalloc = 1; \
95  \
96         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
97         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
98         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
99         a->maxnz += CHUNKSIZE; \
100         a->reallocs++; \
101       } \
102       N = nrow++ - 1; a->nz++; \
103       /* shift up all the later entries in this row */ \
104       for ( ii=N; ii>=_i; ii-- ) { \
105         rp[ii+1] = rp[ii]; \
106         ap[ii+1] = ap[ii]; \
107       } \
108       rp[_i] = col1;  \
109       ap[_i] = value;  \
110       a_noinsert: ; \
111       ailen[row] = nrow; \
112 }
113 
114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
118     rmax = bimax[row]; nrow = bilen[row];  \
119     col1 = col - shift; \
120      \
121     low = 0; high = nrow; \
122     while (high-low > 5) { \
123       t = (low+high)/2; \
124       if (rp[t] > col) high = t; \
125       else             low  = t; \
126     } \
127        for ( _i=0; _i<nrow; _i++ ) { \
128         if (rp[_i] > col1) break; \
129         if (rp[_i] == col1) { \
130           if (addv == ADD_VALUES) ap[_i] += value;   \
131           else                  ap[_i] = value; \
132           goto b_noinsert; \
133         } \
134       }  \
135       if (nonew == 1) goto b_noinsert; \
136       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
137       if (nrow >= rmax) { \
138         /* there is no extra room in row, therefore enlarge */ \
139         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
140         Scalar *new_a; \
141  \
142         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
143  \
144         /* malloc new storage space */ \
145         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
146         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
147         new_j   = (int *) (new_a + new_nz); \
148         new_i   = new_j + new_nz; \
149  \
150         /* copy over old data into new slots */ \
151         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
152         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
153         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
154         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
155         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
156                                                            len*sizeof(int)); \
157         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
158         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
159                                                            len*sizeof(Scalar));  \
160         /* free up old matrix storage */ \
161  \
162         PetscFree(b->a);  \
163         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = 1; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for ( ii=N; ii>=_i; ii-- ) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
186 #undef __FUNC__
187 #define __FUNC__ "MatSetValues_MPIAIJ"
188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
189 {
190   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
191   Scalar     value;
192   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
193   int        cstart = aij->cstart, cend = aij->cend,row,col;
194   int        roworiented = aij->roworiented;
195 
196   /* Some Variables required in the macro */
197   Mat        A = aij->A;
198   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
199   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
200   Scalar     *aa = a->a;
201 
202   Mat        B = aij->B;
203   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
204   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
205   Scalar     *ba = b->a;
206 
207   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
208   int        nonew = a->nonew,shift = a->indexshift;
209   Scalar     *ap;
210 
211   PetscFunctionBegin;
212   for ( i=0; i<m; i++ ) {
213     if (im[i] < 0) continue;
214 #if defined(USE_PETSC_BOPT_g)
215     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
216 #endif
217     if (im[i] >= rstart && im[i] < rend) {
218       row = im[i] - rstart;
219       for ( j=0; j<n; j++ ) {
220         if (in[j] >= cstart && in[j] < cend){
221           col = in[j] - cstart;
222           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         }
226         else if (in[j] < 0) continue;
227 #if defined(USE_PETSC_BOPT_g)
228         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
229 #endif
230         else {
231           if (mat->was_assembled) {
232             if (!aij->colmap) {
233               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
234             }
235 #if defined (USE_CTABLE)
236 	    col = TableFind( aij->colmap, in[j] + 1 ) - 1;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ *) B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !aij->donotstash) {
257         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!aij->donotstash) {
260           row = im[i];
261           for ( j=0; j<n; j++ ) {
262             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNC__
272 #define __FUNC__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
276   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
277   int        cstart = aij->cstart, cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for ( i=0; i<m; i++ ) {
281     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
282     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for ( j=0; j<n; j++ ) {
286         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
287         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (USE_CTABLE)
296 	  col = TableFind( aij->colmap, idxn[j] + 1 ) - 1;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
318   MPI_Comm    comm = mat->comm;
319   int         size = aij->size, *owners = aij->rowners;
320   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
321   MPI_Request *send_waits,*recv_waits;
322   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
323   InsertMode  addv;
324   Scalar      *rvalues,*svalues;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     aij->svalues    = 0; aij->rvalues    = 0;
329     aij->nsends     = 0; aij->nrecvs     = 0;
330     aij->send_waits = 0; aij->recv_waits = 0;
331     aij->rmax       = 0;
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   /*  first count number of contributors to each processor */
343   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
344   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
345   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
346   for ( i=0; i<aij->stash.n; i++ ) {
347     idx = aij->stash.idx[i];
348     for ( j=0; j<size; j++ ) {
349       if (idx >= owners[j] && idx < owners[j+1]) {
350         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
351       }
352     }
353   }
354   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
355 
356   /* inform other processors of number of messages and max length*/
357   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
358   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
359   nreceives = work[rank];
360   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
361   nmax = work[rank];
362   PetscFree(work);
363 
364   /* post receives:
365        1) each message will consist of ordered pairs
366      (global index,value) we store the global index as a double
367      to simplify the message passing.
368        2) since we don't know how long each individual message is we
369      allocate the largest needed buffer for each receive. Potentially
370      this is a lot of wasted space.
371 
372 
373        This could be done better.
374   */
375   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
376   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
377   for ( i=0; i<nreceives; i++ ) {
378     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
379               comm,recv_waits+i);CHKERRQ(ierr);
380   }
381 
382   /* do sends:
383       1) starts[i] gives the starting index in svalues for stuff going to
384          the ith processor
385   */
386   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
387   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
388   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
389   starts[0]  = 0;
390   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
391   for ( i=0; i<aij->stash.n; i++ ) {
392     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
393     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
394     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
395   }
396   PetscFree(owner);
397   starts[0] = 0;
398   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
399   count = 0;
400   for ( i=0; i<size; i++ ) {
401     if (procs[i]) {
402       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
403     }
404   }
405   PetscFree(starts);
406   PetscFree(nprocs);
407 
408   /* Free cache space */
409   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
410   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
411 
412   aij->svalues    = svalues;    aij->rvalues    = rvalues;
413   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
414   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
415   aij->rmax       = nmax;
416 
417   PetscFunctionReturn(0);
418 }
419 extern int MatSetUpMultiply_MPIAIJ(Mat);
420 
421 #undef __FUNC__
422 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
423 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
424 {
425   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
426   MPI_Status  *send_status,recv_status;
427   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
428   int         row,col,other_disassembled;
429   Scalar      *values,val;
430   InsertMode  addv = mat->insertmode;
431 
432   PetscFunctionBegin;
433 
434   /*  wait on receives */
435   while (count) {
436     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
437     /* unpack receives into our local space */
438     values = aij->rvalues + 3*imdex*aij->rmax;
439     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
440     n = n/3;
441     for ( i=0; i<n; i++ ) {
442       row = (int) PetscReal(values[3*i]) - aij->rstart;
443       col = (int) PetscReal(values[3*i+1]);
444       val = values[3*i+2];
445       if (col >= aij->cstart && col < aij->cend) {
446         col -= aij->cstart;
447         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
448       } else {
449         if (mat->was_assembled || mat->assembled) {
450           if (!aij->colmap) {
451             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
452           }
453 #if defined (USE_CTABLE)
454           col = TableFind( aij->colmap, col + 1 ) - 1;
455 #else
456           col = aij->colmap[col] - 1;
457 #endif
458           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
459             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460             col = (int) PetscReal(values[3*i+1]);
461           }
462         }
463         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
464       }
465     }
466     count--;
467   }
468   if (aij->recv_waits) PetscFree(aij->recv_waits);
469   if (aij->rvalues)    PetscFree(aij->rvalues);
470 
471   /* wait on sends */
472   if (aij->nsends) {
473     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
474     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
475     PetscFree(send_status);
476   }
477   if (aij->send_waits) PetscFree(aij->send_waits);
478   if (aij->svalues)    PetscFree(aij->svalues);
479 
480   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
481   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
482 
483   /* determine if any processor has disassembled, if so we must
484      also disassemble ourselfs, in order that we may reassemble. */
485   /*
486      if nonzero structure of submatrix B cannot change then we know that
487      no processor disassembled thus we can skip this stuff
488   */
489   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
490     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
491     if (mat->was_assembled && !other_disassembled) {
492       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
493     }
494   }
495 
496   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
497     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
498   }
499   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
500   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
501 
502   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNC__
507 #define __FUNC__ "MatZeroEntries_MPIAIJ"
508 int MatZeroEntries_MPIAIJ(Mat A)
509 {
510   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
511   int        ierr;
512 
513   PetscFunctionBegin;
514   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
515   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNC__
520 #define __FUNC__ "MatZeroRows_MPIAIJ"
521 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
522 {
523   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
524   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
525   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
526   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
527   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
528   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
529   MPI_Comm       comm = A->comm;
530   MPI_Request    *send_waits,*recv_waits;
531   MPI_Status     recv_status,*send_status;
532   IS             istmp;
533 
534   PetscFunctionBegin;
535   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
536   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
537 
538   /*  first count number of contributors to each processor */
539   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
540   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
541   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
542   for ( i=0; i<N; i++ ) {
543     idx = rows[i];
544     found = 0;
545     for ( j=0; j<size; j++ ) {
546       if (idx >= owners[j] && idx < owners[j+1]) {
547         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
548       }
549     }
550     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
551   }
552   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
553 
554   /* inform other processors of number of messages and max length*/
555   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
556   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
557   nrecvs = work[rank];
558   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
559   nmax = work[rank];
560   PetscFree(work);
561 
562   /* post receives:   */
563   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
564   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
565   for ( i=0; i<nrecvs; i++ ) {
566     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
567   }
568 
569   /* do sends:
570       1) starts[i] gives the starting index in svalues for stuff going to
571          the ith processor
572   */
573   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
574   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
575   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
576   starts[0] = 0;
577   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
578   for ( i=0; i<N; i++ ) {
579     svalues[starts[owner[i]]++] = rows[i];
580   }
581   ISRestoreIndices(is,&rows);
582 
583   starts[0] = 0;
584   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
585   count = 0;
586   for ( i=0; i<size; i++ ) {
587     if (procs[i]) {
588       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
589     }
590   }
591   PetscFree(starts);
592 
593   base = owners[rank];
594 
595   /*  wait on receives */
596   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
597   source = lens + nrecvs;
598   count  = nrecvs; slen = 0;
599   while (count) {
600     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
601     /* unpack receives into our local space */
602     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
603     source[imdex]  = recv_status.MPI_SOURCE;
604     lens[imdex]  = n;
605     slen += n;
606     count--;
607   }
608   PetscFree(recv_waits);
609 
610   /* move the data into the send scatter */
611   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
612   count = 0;
613   for ( i=0; i<nrecvs; i++ ) {
614     values = rvalues + i*nmax;
615     for ( j=0; j<lens[i]; j++ ) {
616       lrows[count++] = values[j] - base;
617     }
618   }
619   PetscFree(rvalues); PetscFree(lens);
620   PetscFree(owner); PetscFree(nprocs);
621 
622   /* actually zap the local rows */
623   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
624   PLogObjectParent(A,istmp);
625 
626   /*
627         Zero the required rows. If the "diagonal block" of the matrix
628      is square and the user wishes to set the diagonal we use seperate
629      code so that MatSetValues() is not called for each diagonal allocating
630      new memory, thus calling lots of mallocs and slowing things down.
631 
632        Contributed by: Mathew Knepley
633   */
634   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
635   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
636   if (diag && (l->A->M == l->A->N)) {
637     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
638   } else if (diag) {
639     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
640     for ( i = 0; i < slen; i++ ) {
641       row  = lrows[i] + rstart;
642       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
643     }
644     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
645     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646   } else {
647     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
648   }
649   ierr = ISDestroy(istmp); CHKERRQ(ierr);
650   PetscFree(lrows);
651 
652   /* wait on sends */
653   if (nsends) {
654     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
655     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
656     PetscFree(send_status);
657   }
658   PetscFree(send_waits); PetscFree(svalues);
659 
660   PetscFunctionReturn(0);
661 }
662 
663 #undef __FUNC__
664 #define __FUNC__ "MatMult_MPIAIJ"
665 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
666 {
667   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
668   int        ierr,nt;
669 
670   PetscFunctionBegin;
671   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
672   if (nt != a->n) {
673     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
674   }
675   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
676   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
677   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
678   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNC__
683 #define __FUNC__ "MatMultAdd_MPIAIJ"
684 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
685 {
686   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
687   int        ierr;
688 
689   PetscFunctionBegin;
690   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
691   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
692   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
693   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNC__
698 #define __FUNC__ "MatMultTrans_MPIAIJ"
699 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
700 {
701   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
702   int        ierr;
703 
704   PetscFunctionBegin;
705   /* do nondiagonal part */
706   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
707   /* send it on its way */
708   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
709   /* do local part */
710   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
711   /* receive remote parts: note this assumes the values are not actually */
712   /* inserted in yy until the next line, which is true for my implementation*/
713   /* but is not perhaps always true. */
714   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNC__
719 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
720 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
721 {
722   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
723   int        ierr;
724 
725   PetscFunctionBegin;
726   /* do nondiagonal part */
727   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
728   /* send it on its way */
729   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
730   /* do local part */
731   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
732   /* receive remote parts: note this assumes the values are not actually */
733   /* inserted in yy until the next line, which is true for my implementation*/
734   /* but is not perhaps always true. */
735   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 /*
740   This only works correctly for square matrices where the subblock A->A is the
741    diagonal block
742 */
743 #undef __FUNC__
744 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
745 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
746 {
747   int        ierr;
748   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
749 
750   PetscFunctionBegin;
751   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
752   if (a->rstart != a->cstart || a->rend != a->cend) {
753     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
754   }
755   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNC__
760 #define __FUNC__ "MatScale_MPIAIJ"
761 int MatScale_MPIAIJ(Scalar *aa,Mat A)
762 {
763   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
764   int        ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
768   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNC__
773 #define __FUNC__ "MatDestroy_MPIAIJ"
774 int MatDestroy_MPIAIJ(Mat mat)
775 {
776   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
777   int        ierr;
778 
779   PetscFunctionBegin;
780   if (--mat->refct > 0) PetscFunctionReturn(0);
781 
782   if (mat->mapping) {
783     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
784   }
785   if (mat->bmapping) {
786     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
787   }
788   if (mat->rmap) {
789     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
790   }
791   if (mat->cmap) {
792     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
793   }
794 #if defined(USE_PETSC_LOG)
795   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
796 #endif
797   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
798   PetscFree(aij->rowners);
799   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
800   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
801 #if defined (USE_CTABLE)
802   if (aij->colmap) TableDelete(aij->colmap);
803 #else
804   if (aij->colmap) PetscFree(aij->colmap);
805 #endif
806   if (aij->garray) PetscFree(aij->garray);
807   if (aij->lvec)   VecDestroy(aij->lvec);
808   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
809   if (aij->rowvalues) PetscFree(aij->rowvalues);
810   PetscFree(aij);
811   PLogObjectDestroy(mat);
812   PetscHeaderDestroy(mat);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNC__
817 #define __FUNC__ "MatView_MPIAIJ_Binary"
818 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
819 {
820   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
821   int         ierr;
822 
823   PetscFunctionBegin;
824   if (aij->size == 1) {
825     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
826   }
827   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNC__
832 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
833 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
834 {
835   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
836   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
837   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
838   int         size = aij->size;
839   FILE        *fd;
840   ViewerType  vtype;
841 
842   PetscFunctionBegin;
843   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
844   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
845     ierr = ViewerGetFormat(viewer,&format);
846     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
847       MatInfo info;
848       int     flg;
849       MPI_Comm_rank(mat->comm,&rank);
850       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
851       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
852       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
853       PetscSequentialPhaseBegin(mat->comm,1);
854       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
855          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
856       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
857          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
858       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
859       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
860       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
861       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
862       fflush(fd);
863       PetscSequentialPhaseEnd(mat->comm,1);
864       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
865       PetscFunctionReturn(0);
866     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
867       PetscFunctionReturn(0);
868     }
869   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
870     Draw       draw;
871     PetscTruth isnull;
872     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
873     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
874   }
875 
876   if (size == 1) {
877     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
878   } else {
879     /* assemble the entire matrix onto first processor. */
880     Mat         A;
881     Mat_SeqAIJ *Aloc;
882     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
883     Scalar      *a;
884 
885     if (!rank) {
886       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
887     } else {
888       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
889     }
890     PLogObjectParent(mat,A);
891 
892     /* copy over the A part */
893     Aloc = (Mat_SeqAIJ*) aij->A->data;
894     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
895     row = aij->rstart;
896     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
897     for ( i=0; i<m; i++ ) {
898       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
899       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
900     }
901     aj = Aloc->j;
902     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
903 
904     /* copy over the B part */
905     Aloc = (Mat_SeqAIJ*) aij->B->data;
906     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
907     row = aij->rstart;
908     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
909     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
910     for ( i=0; i<m; i++ ) {
911       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
912       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
913     }
914     PetscFree(ct);
915     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
916     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
917     /*
918        Everyone has to call to draw the matrix since the graphics waits are
919        synchronized across all processors that share the Draw object
920     */
921     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
922       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
923     }
924     ierr = MatDestroy(A); CHKERRQ(ierr);
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNC__
930 #define __FUNC__ "MatView_MPIAIJ"
931 int MatView_MPIAIJ(Mat mat,Viewer viewer)
932 {
933   int         ierr;
934   ViewerType  vtype;
935 
936   PetscFunctionBegin;
937   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
938   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
939       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
940     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
941   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
942     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
943   } else {
944     SETERRQ(1,1,"Viewer type not supported by PETSc object");
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 /*
950     This has to provide several versions.
951 
952      2) a) use only local smoothing updating outer values only once.
953         b) local smoothing updating outer values each inner iteration
954      3) color updating out values betwen colors.
955 */
956 #undef __FUNC__
957 #define __FUNC__ "MatRelax_MPIAIJ"
958 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
959                            double fshift,int its,Vec xx)
960 {
961   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
962   Mat        AA = mat->A, BB = mat->B;
963   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
964   Scalar     *b,*x,*xs,*ls,d,*v,sum;
965   int        ierr,*idx, *diag;
966   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
967 
968   PetscFunctionBegin;
969   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
970   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
971   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
972   xs = x + shift; /* shift by one for index start of 1 */
973   ls = ls + shift;
974   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
975   diag = A->diag;
976   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
977     if (flag & SOR_ZERO_INITIAL_GUESS) {
978       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
979       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
980       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
981       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     }
984     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
985     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
986     while (its--) {
987       /* go down through the rows */
988       for ( i=0; i<m; i++ ) {
989         n    = A->i[i+1] - A->i[i];
990 	PLogFlops(4*n+3);
991         idx  = A->j + A->i[i] + shift;
992         v    = A->a + A->i[i] + shift;
993         sum  = b[i];
994         SPARSEDENSEMDOT(sum,xs,v,idx,n);
995         d    = fshift + A->a[diag[i]+shift];
996         n    = B->i[i+1] - B->i[i];
997         idx  = B->j + B->i[i] + shift;
998         v    = B->a + B->i[i] + shift;
999         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1000         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1001       }
1002       /* come up through the rows */
1003       for ( i=m-1; i>-1; i-- ) {
1004         n    = A->i[i+1] - A->i[i];
1005 	PLogFlops(4*n+3)
1006         idx  = A->j + A->i[i] + shift;
1007         v    = A->a + A->i[i] + shift;
1008         sum  = b[i];
1009         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1010         d    = fshift + A->a[diag[i]+shift];
1011         n    = B->i[i+1] - B->i[i];
1012         idx  = B->j + B->i[i] + shift;
1013         v    = B->a + B->i[i] + shift;
1014         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1015         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1016       }
1017     }
1018   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1019     if (flag & SOR_ZERO_INITIAL_GUESS) {
1020       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1021       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1022       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1023       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1024       PetscFunctionReturn(0);
1025     }
1026     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1027     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1028     while (its--) {
1029       for ( i=0; i<m; i++ ) {
1030         n    = A->i[i+1] - A->i[i];
1031 	PLogFlops(4*n+3);
1032         idx  = A->j + A->i[i] + shift;
1033         v    = A->a + A->i[i] + shift;
1034         sum  = b[i];
1035         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1036         d    = fshift + A->a[diag[i]+shift];
1037         n    = B->i[i+1] - B->i[i];
1038         idx  = B->j + B->i[i] + shift;
1039         v    = B->a + B->i[i] + shift;
1040         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1041         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1042       }
1043     }
1044   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1045     if (flag & SOR_ZERO_INITIAL_GUESS) {
1046       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1047       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1048       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1049       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1050       PetscFunctionReturn(0);
1051     }
1052     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054     while (its--) {
1055       for ( i=m-1; i>-1; i-- ) {
1056         n    = A->i[i+1] - A->i[i];
1057 	PLogFlops(4*n+3);
1058         idx  = A->j + A->i[i] + shift;
1059         v    = A->a + A->i[i] + shift;
1060         sum  = b[i];
1061         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1062         d    = fshift + A->a[diag[i]+shift];
1063         n    = B->i[i+1] - B->i[i];
1064         idx  = B->j + B->i[i] + shift;
1065         v    = B->a + B->i[i] + shift;
1066         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1067         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1068       }
1069     }
1070   } else {
1071     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1072   }
1073   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1074   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1075   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNC__
1080 #define __FUNC__ "MatGetInfo_MPIAIJ"
1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1084   Mat        A = mat->A, B = mat->B;
1085   int        ierr;
1086   double     isend[5], irecv[5];
1087 
1088   PetscFunctionBegin;
1089   info->block_size     = 1.0;
1090   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1091   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1092   isend[3] = info->memory;  isend[4] = info->mallocs;
1093   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1094   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1095   isend[3] += info->memory;  isend[4] += info->mallocs;
1096   if (flag == MAT_LOCAL) {
1097     info->nz_used      = isend[0];
1098     info->nz_allocated = isend[1];
1099     info->nz_unneeded  = isend[2];
1100     info->memory       = isend[3];
1101     info->mallocs      = isend[4];
1102   } else if (flag == MAT_GLOBAL_MAX) {
1103     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1104     info->nz_used      = irecv[0];
1105     info->nz_allocated = irecv[1];
1106     info->nz_unneeded  = irecv[2];
1107     info->memory       = irecv[3];
1108     info->mallocs      = irecv[4];
1109   } else if (flag == MAT_GLOBAL_SUM) {
1110     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1111     info->nz_used      = irecv[0];
1112     info->nz_allocated = irecv[1];
1113     info->nz_unneeded  = irecv[2];
1114     info->memory       = irecv[3];
1115     info->mallocs      = irecv[4];
1116   }
1117   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1118   info->fill_ratio_needed = 0;
1119   info->factor_mallocs    = 0;
1120   info->rows_global       = (double)mat->M;
1121   info->columns_global    = (double)mat->N;
1122   info->rows_local        = (double)mat->m;
1123   info->columns_local     = (double)mat->N;
1124 
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNC__
1129 #define __FUNC__ "MatSetOption_MPIAIJ"
1130 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1131 {
1132   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1133 
1134   PetscFunctionBegin;
1135   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1136       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1137       op == MAT_COLUMNS_UNSORTED ||
1138       op == MAT_COLUMNS_SORTED ||
1139       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1140       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1141         MatSetOption(a->A,op);
1142         MatSetOption(a->B,op);
1143   } else if (op == MAT_ROW_ORIENTED) {
1144     a->roworiented = 1;
1145     MatSetOption(a->A,op);
1146     MatSetOption(a->B,op);
1147   } else if (op == MAT_ROWS_SORTED ||
1148              op == MAT_ROWS_UNSORTED ||
1149              op == MAT_SYMMETRIC ||
1150              op == MAT_STRUCTURALLY_SYMMETRIC ||
1151              op == MAT_YES_NEW_DIAGONALS)
1152     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1153   else if (op == MAT_COLUMN_ORIENTED) {
1154     a->roworiented = 0;
1155     MatSetOption(a->A,op);
1156     MatSetOption(a->B,op);
1157   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1158     a->donotstash = 1;
1159   } else if (op == MAT_NO_NEW_DIAGONALS){
1160     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1161   } else {
1162     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNC__
1168 #define __FUNC__ "MatGetSize_MPIAIJ"
1169 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1170 {
1171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1172 
1173   PetscFunctionBegin;
1174   if (m) *m = mat->M;
1175   if (n) *n = mat->N;
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNC__
1180 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1181 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1182 {
1183   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1184 
1185   PetscFunctionBegin;
1186   if (m) *m = mat->m;
1187   if (n) *n = mat->n;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNC__
1192 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1193 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1194 {
1195   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1196 
1197   PetscFunctionBegin;
1198   *m = mat->rstart; *n = mat->rend;
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1203 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatGetRow_MPIAIJ"
1207 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1208 {
1209   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1210   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1211   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1212   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1213   int        *cmap, *idx_p;
1214 
1215   PetscFunctionBegin;
1216   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1217   mat->getrowactive = PETSC_TRUE;
1218 
1219   if (!mat->rowvalues && (idx || v)) {
1220     /*
1221         allocate enough space to hold information from the longest row.
1222     */
1223     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1224     int     max = 1,m = mat->m,tmp;
1225     for ( i=0; i<m; i++ ) {
1226       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1227       if (max < tmp) { max = tmp; }
1228     }
1229     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1230     mat->rowindices = (int *) (mat->rowvalues + max);
1231   }
1232 
1233   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1234   lrow = row - rstart;
1235 
1236   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1237   if (!v)   {pvA = 0; pvB = 0;}
1238   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1239   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1240   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1241   nztot = nzA + nzB;
1242 
1243   cmap  = mat->garray;
1244   if (v  || idx) {
1245     if (nztot) {
1246       /* Sort by increasing column numbers, assuming A and B already sorted */
1247       int imark = -1;
1248       if (v) {
1249         *v = v_p = mat->rowvalues;
1250         for ( i=0; i<nzB; i++ ) {
1251           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1252           else break;
1253         }
1254         imark = i;
1255         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1256         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1257       }
1258       if (idx) {
1259         *idx = idx_p = mat->rowindices;
1260         if (imark > -1) {
1261           for ( i=0; i<imark; i++ ) {
1262             idx_p[i] = cmap[cworkB[i]];
1263           }
1264         } else {
1265           for ( i=0; i<nzB; i++ ) {
1266             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1267             else break;
1268           }
1269           imark = i;
1270         }
1271         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1272         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1273       }
1274     } else {
1275       if (idx) *idx = 0;
1276       if (v)   *v   = 0;
1277     }
1278   }
1279   *nz = nztot;
1280   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1281   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNC__
1286 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1287 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1288 {
1289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1290 
1291   PetscFunctionBegin;
1292   if (aij->getrowactive == PETSC_FALSE) {
1293     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1294   }
1295   aij->getrowactive = PETSC_FALSE;
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNC__
1300 #define __FUNC__ "MatNorm_MPIAIJ"
1301 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1302 {
1303   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1304   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1305   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1306   double     sum = 0.0;
1307   Scalar     *v;
1308 
1309   PetscFunctionBegin;
1310   if (aij->size == 1) {
1311     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1312   } else {
1313     if (type == NORM_FROBENIUS) {
1314       v = amat->a;
1315       for (i=0; i<amat->nz; i++ ) {
1316 #if defined(USE_PETSC_COMPLEX)
1317         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1318 #else
1319         sum += (*v)*(*v); v++;
1320 #endif
1321       }
1322       v = bmat->a;
1323       for (i=0; i<bmat->nz; i++ ) {
1324 #if defined(USE_PETSC_COMPLEX)
1325         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1326 #else
1327         sum += (*v)*(*v); v++;
1328 #endif
1329       }
1330       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1331       *norm = sqrt(*norm);
1332     } else if (type == NORM_1) { /* max column norm */
1333       double *tmp, *tmp2;
1334       int    *jj, *garray = aij->garray;
1335       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1336       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1337       PetscMemzero(tmp,aij->N*sizeof(double));
1338       *norm = 0.0;
1339       v = amat->a; jj = amat->j;
1340       for ( j=0; j<amat->nz; j++ ) {
1341         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1342       }
1343       v = bmat->a; jj = bmat->j;
1344       for ( j=0; j<bmat->nz; j++ ) {
1345         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1346       }
1347       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1348       for ( j=0; j<aij->N; j++ ) {
1349         if (tmp2[j] > *norm) *norm = tmp2[j];
1350       }
1351       PetscFree(tmp); PetscFree(tmp2);
1352     } else if (type == NORM_INFINITY) { /* max row norm */
1353       double ntemp = 0.0;
1354       for ( j=0; j<amat->m; j++ ) {
1355         v = amat->a + amat->i[j] + shift;
1356         sum = 0.0;
1357         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1358           sum += PetscAbsScalar(*v); v++;
1359         }
1360         v = bmat->a + bmat->i[j] + shift;
1361         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1362           sum += PetscAbsScalar(*v); v++;
1363         }
1364         if (sum > ntemp) ntemp = sum;
1365       }
1366       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1367     } else {
1368       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1369     }
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNC__
1375 #define __FUNC__ "MatTranspose_MPIAIJ"
1376 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1377 {
1378   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1379   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1380   int        ierr,shift = Aloc->indexshift;
1381   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1382   Mat        B;
1383   Scalar     *array;
1384 
1385   PetscFunctionBegin;
1386   if (matout == PETSC_NULL && M != N) {
1387     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1388   }
1389 
1390   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1391 
1392   /* copy over the A part */
1393   Aloc = (Mat_SeqAIJ*) a->A->data;
1394   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1395   row = a->rstart;
1396   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1397   for ( i=0; i<m; i++ ) {
1398     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1399     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1400   }
1401   aj = Aloc->j;
1402   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1403 
1404   /* copy over the B part */
1405   Aloc = (Mat_SeqAIJ*) a->B->data;
1406   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1407   row = a->rstart;
1408   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1409   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1410   for ( i=0; i<m; i++ ) {
1411     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1412     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1413   }
1414   PetscFree(ct);
1415   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1416   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1417   if (matout != PETSC_NULL) {
1418     *matout = B;
1419   } else {
1420     PetscOps       *Abops;
1421     struct _MatOps *Aops;
1422 
1423     /* This isn't really an in-place transpose .... but free data structures from a */
1424     PetscFree(a->rowners);
1425     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1426     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1427 #if defined (USE_CTABLE)
1428     if (a->colmap) TableDelete(a->colmap);
1429 #else
1430     if (a->colmap) PetscFree(a->colmap);
1431 #endif
1432     if (a->garray) PetscFree(a->garray);
1433     if (a->lvec) VecDestroy(a->lvec);
1434     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1435     PetscFree(a);
1436 
1437     /*
1438        This is horrible, horrible code. We need to keep the
1439       A pointers for the bops and ops but copy everything
1440       else from C.
1441     */
1442     Abops = A->bops;
1443     Aops  = A->ops;
1444     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1445     A->bops = Abops;
1446     A->ops  = Aops;
1447     PetscHeaderDestroy(B);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNC__
1453 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1454 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1455 {
1456   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1457   Mat a = aij->A, b = aij->B;
1458   int ierr,s1,s2,s3;
1459 
1460   PetscFunctionBegin;
1461   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1462   if (rr) {
1463     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1464     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1465     /* Overlap communication with computation. */
1466     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1467   }
1468   if (ll) {
1469     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1470     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1471     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1472   }
1473   /* scale  the diagonal block */
1474   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1475 
1476   if (rr) {
1477     /* Do a scatter end and then right scale the off-diagonal block */
1478     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1479     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1480   }
1481 
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 
1486 extern int MatPrintHelp_SeqAIJ(Mat);
1487 #undef __FUNC__
1488 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1489 int MatPrintHelp_MPIAIJ(Mat A)
1490 {
1491   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1492   int        ierr;
1493 
1494   PetscFunctionBegin;
1495   if (!a->rank) {
1496     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNC__
1502 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1503 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1504 {
1505   PetscFunctionBegin;
1506   *bs = 1;
1507   PetscFunctionReturn(0);
1508 }
1509 #undef __FUNC__
1510 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1511 int MatSetUnfactored_MPIAIJ(Mat A)
1512 {
1513   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1514   int        ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNC__
1522 #define __FUNC__ "MatEqual_MPIAIJ"
1523 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1524 {
1525   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1526   Mat        a, b, c, d;
1527   PetscTruth flg;
1528   int        ierr;
1529 
1530   PetscFunctionBegin;
1531   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1532   a = matA->A; b = matA->B;
1533   c = matB->A; d = matB->B;
1534 
1535   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1536   if (flg == PETSC_TRUE) {
1537     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1538   }
1539   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNC__
1544 #define __FUNC__ "MatCopy_MPIAIJ"
1545 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1546 {
1547   int        ierr;
1548   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1549   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1550 
1551   PetscFunctionBegin;
1552   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1553     /* because of the column compression in the off-processor part of the matrix a->B,
1554        the number of columns in a->B and b->B may be different, hence we cannot call
1555        the MatCopy() directly on the two parts. If need be, we can provide a more
1556        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1557        then copying the submatrices */
1558     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1559   } else {
1560     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1561     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1567 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1568 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1569 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1570 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1571 
1572 /* -------------------------------------------------------------------*/
1573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1574        MatGetRow_MPIAIJ,
1575        MatRestoreRow_MPIAIJ,
1576        MatMult_MPIAIJ,
1577        MatMultAdd_MPIAIJ,
1578        MatMultTrans_MPIAIJ,
1579        MatMultTransAdd_MPIAIJ,
1580        0,
1581        0,
1582        0,
1583        0,
1584        0,
1585        0,
1586        MatRelax_MPIAIJ,
1587        MatTranspose_MPIAIJ,
1588        MatGetInfo_MPIAIJ,
1589        MatEqual_MPIAIJ,
1590        MatGetDiagonal_MPIAIJ,
1591        MatDiagonalScale_MPIAIJ,
1592        MatNorm_MPIAIJ,
1593        MatAssemblyBegin_MPIAIJ,
1594        MatAssemblyEnd_MPIAIJ,
1595        0,
1596        MatSetOption_MPIAIJ,
1597        MatZeroEntries_MPIAIJ,
1598        MatZeroRows_MPIAIJ,
1599        0,
1600        0,
1601        0,
1602        0,
1603        MatGetSize_MPIAIJ,
1604        MatGetLocalSize_MPIAIJ,
1605        MatGetOwnershipRange_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        0,
1610        MatDuplicate_MPIAIJ,
1611        0,
1612        0,
1613        0,
1614        0,
1615        0,
1616        MatGetSubMatrices_MPIAIJ,
1617        MatIncreaseOverlap_MPIAIJ,
1618        MatGetValues_MPIAIJ,
1619        MatCopy_MPIAIJ,
1620        MatPrintHelp_MPIAIJ,
1621        MatScale_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625        MatGetBlockSize_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630        MatFDColoringCreate_MPIAIJ,
1631        0,
1632        MatSetUnfactored_MPIAIJ,
1633        0,
1634        0,
1635        MatGetSubMatrix_MPIAIJ,
1636        0,
1637        0,
1638        MatGetMaps_Petsc};
1639 
1640 /* ----------------------------------------------------------------------------------------*/
1641 
1642 EXTERN_C_BEGIN
1643 #undef __FUNC__
1644 #define __FUNC__ "MatStoreValues_MPIAIJ"
1645 int MatStoreValues_MPIAIJ(Mat mat)
1646 {
1647   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1648   int        ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1652   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 EXTERN_C_END
1656 
1657 EXTERN_C_BEGIN
1658 #undef __FUNC__
1659 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1660 int MatRetrieveValues_MPIAIJ(Mat mat)
1661 {
1662   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1663   int        ierr;
1664 
1665   PetscFunctionBegin;
1666   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1667   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 EXTERN_C_END
1671 
1672 #include "pc.h"
1673 EXTERN_C_BEGIN
1674 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1675 EXTERN_C_END
1676 
1677 #undef __FUNC__
1678 #define __FUNC__ "MatCreateMPIAIJ"
1679 /*@C
1680    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1681    (the default parallel PETSc format).  For good matrix assembly performance
1682    the user should preallocate the matrix storage by setting the parameters
1683    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1684    performance can be increased by more than a factor of 50.
1685 
1686    Collective on MPI_Comm
1687 
1688    Input Parameters:
1689 +  comm - MPI communicator
1690 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1691            This value should be the same as the local size used in creating the
1692            y vector for the matrix-vector product y = Ax.
1693 .  n - This value should be the same as the local size used in creating the
1694        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1695        calculated if N is given) For square matrices n is almost always m.
1696 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1697 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1698 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1699            (same value is used for all local rows)
1700 .  d_nnz - array containing the number of nonzeros in the various rows of the
1701            DIAGONAL portion of the local submatrix (possibly different for each row)
1702            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1703            The size of this array is equal to the number of local rows, i.e 'm'.
1704            You must leave room for the diagonal entry even if it is zero.
1705 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1706            submatrix (same value is used for all local rows).
1707 -  o_nnz - array containing the number of nonzeros in the various rows of the
1708            OFF-DIAGONAL portion of the local submatrix (possibly different for
1709            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1710            structure. The size of this array is equal to the number
1711            of local rows, i.e 'm'.
1712 
1713    Output Parameter:
1714 .  A - the matrix
1715 
1716    Notes:
1717    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1718    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1719    storage requirements for this matrix.
1720 
1721    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1722    processor than it must be used on all processors that share the object for
1723    that argument.
1724 
1725    The AIJ format (also called the Yale sparse matrix format or
1726    compressed row storage), is fully compatible with standard Fortran 77
1727    storage.  That is, the stored row and column indices can begin at
1728    either one (as in Fortran) or zero.  See the users manual for details.
1729 
1730    The user MUST specify either the local or global matrix dimensions
1731    (possibly both).
1732 
1733    The parallel matrix is partitioned such that the first m0 rows belong to
1734    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1735    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1736 
1737    The DIAGONAL portion of the local submatrix of a processor can be defined
1738    as the submatrix which is obtained by extraction the part corresponding
1739    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1740    first row that belongs to the processor, and r2 is the last row belonging
1741    to the this processor. This is a square mxm matrix. The remaining portion
1742    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1743 
1744    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1745 
1746    By default, this format uses inodes (identical nodes) when possible.
1747    We search for consecutive rows with the same nonzero structure, thereby
1748    reusing matrix information to achieve increased efficiency.
1749 
1750    Options Database Keys:
1751 +  -mat_aij_no_inode  - Do not use inodes
1752 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1753 -  -mat_aij_oneindex - Internally use indexing starting at 1
1754         rather than 0.  Note that when calling MatSetValues(),
1755         the user still MUST index entries starting at 0!
1756 .   -mat_mpi - use the parallel matrix data structures even on one processor
1757                (defaults to using SeqBAIJ format on one processor)
1758 .   -mat_mpi - use the parallel matrix data structures even on one processor
1759                (defaults to using SeqAIJ format on one processor)
1760 
1761 
1762    Example usage:
1763 
1764    Consider the following 8x8 matrix with 34 non-zero values, that is
1765    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1766    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1767    as follows:
1768 
1769 .vb
1770             1  2  0  |  0  3  0  |  0  4
1771     Proc0   0  5  6  |  7  0  0  |  8  0
1772             9  0 10  | 11  0  0  | 12  0
1773     -------------------------------------
1774            13  0 14  | 15 16 17  |  0  0
1775     Proc1   0 18  0  | 19 20 21  |  0  0
1776             0  0  0  | 22 23  0  | 24  0
1777     -------------------------------------
1778     Proc2  25 26 27  |  0  0 28  | 29  0
1779            30  0  0  | 31 32 33  |  0 34
1780 .ve
1781 
1782    This can be represented as a collection of submatrices as:
1783 
1784 .vb
1785       A B C
1786       D E F
1787       G H I
1788 .ve
1789 
1790    Where the submatrices A,B,C are owned by proc0, D,E,F are
1791    owned by proc1, G,H,I are owned by proc2.
1792 
1793    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1794    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1795    The 'M','N' parameters are 8,8, and have the same values on all procs.
1796 
1797    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1798    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1799    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1800    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1801    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1802    matrix, ans [DF] as another SeqAIJ matrix.
1803 
1804    When d_nz, o_nz parameters are specified, d_nz storage elements are
1805    allocated for every row of the local diagonal submatrix, and o_nz
1806    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1807    One way to choose d_nz and o_nz is to use the max nonzerors per local
1808    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1809    In this case, the values of d_nz,o_nz are:
1810 .vb
1811      proc0 : dnz = 2, o_nz = 2
1812      proc1 : dnz = 3, o_nz = 2
1813      proc2 : dnz = 1, o_nz = 4
1814 .ve
1815    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1816    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1817    for proc3. i.e we are using 12+15+10=37 storage locations to store
1818    34 values.
1819 
1820    When d_nnz, o_nnz parameters are specified, the storage is specified
1821    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1822    In the above case the values for d_nnz,o_nnz are:
1823 .vb
1824      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1825      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1826      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1827 .ve
1828    Here the space allocated is sum of all the above values i.e 34, and
1829    hence pre-allocation is perfect.
1830 
1831    Level: intermediate
1832 
1833 .keywords: matrix, aij, compressed row, sparse, parallel
1834 
1835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1836 @*/
1837 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1838 {
1839   Mat          B;
1840   Mat_MPIAIJ   *b;
1841   int          ierr, i,size,flag1 = 0, flag2 = 0;
1842 
1843   PetscFunctionBegin;
1844   MPI_Comm_size(comm,&size);
1845   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1846   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1847   if (!flag1 && !flag2 && size == 1) {
1848     if (M == PETSC_DECIDE) M = m;
1849     if (N == PETSC_DECIDE) N = n;
1850     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1851     PetscFunctionReturn(0);
1852   }
1853 
1854   *A = 0;
1855   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1856   PLogObjectCreate(B);
1857   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1858   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1859   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1860   B->ops->destroy    = MatDestroy_MPIAIJ;
1861   B->ops->view       = MatView_MPIAIJ;
1862   B->factor          = 0;
1863   B->assembled       = PETSC_FALSE;
1864   B->mapping         = 0;
1865 
1866   B->insertmode      = NOT_SET_VALUES;
1867   b->size            = size;
1868   MPI_Comm_rank(comm,&b->rank);
1869 
1870   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1871     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1872   }
1873 
1874   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1875   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1876   b->m = m; B->m = m;
1877   b->n = n; B->n = n;
1878   b->N = N; B->N = N;
1879   b->M = M; B->M = M;
1880 
1881   /* the information in the maps duplicates the information computed below, eventually
1882      we should remove the duplicate information that is not contained in the maps */
1883   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1884   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1885 
1886   /* build local table of row and column ownerships */
1887   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1888   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1889   b->cowners = b->rowners + b->size + 2;
1890   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1891   b->rowners[0] = 0;
1892   for ( i=2; i<=b->size; i++ ) {
1893     b->rowners[i] += b->rowners[i-1];
1894   }
1895   b->rstart = b->rowners[b->rank];
1896   b->rend   = b->rowners[b->rank+1];
1897   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1898   b->cowners[0] = 0;
1899   for ( i=2; i<=b->size; i++ ) {
1900     b->cowners[i] += b->cowners[i-1];
1901   }
1902   b->cstart = b->cowners[b->rank];
1903   b->cend   = b->cowners[b->rank+1];
1904 
1905   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1906   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1907   PLogObjectParent(B,b->A);
1908   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1909   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1910   PLogObjectParent(B,b->B);
1911 
1912   /* build cache for off array entries formed */
1913   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1914   b->donotstash  = 0;
1915   b->colmap      = 0;
1916   b->garray      = 0;
1917   b->roworiented = 1;
1918 
1919   /* stuff used for matrix vector multiply */
1920   b->lvec      = 0;
1921   b->Mvctx     = 0;
1922 
1923   /* stuff for MatGetRow() */
1924   b->rowindices   = 0;
1925   b->rowvalues    = 0;
1926   b->getrowactive = PETSC_FALSE;
1927 
1928   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1929                                      "MatStoreValues_MPIAIJ",
1930                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1931   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1932                                      "MatRetrieveValues_MPIAIJ",
1933                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1934   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1935                                      "MatGetDiagonalBlock_MPIAIJ",
1936                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1937   *A = B;
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNC__
1942 #define __FUNC__ "MatDuplicate_MPIAIJ"
1943 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1944 {
1945   Mat        mat;
1946   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1947   int        ierr, len=0, flg;
1948 
1949   PetscFunctionBegin;
1950   *newmat       = 0;
1951   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1952   PLogObjectCreate(mat);
1953   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1954   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1955   mat->ops->destroy    = MatDestroy_MPIAIJ;
1956   mat->ops->view       = MatView_MPIAIJ;
1957   mat->factor     = matin->factor;
1958   mat->assembled  = PETSC_TRUE;
1959 
1960   a->m = mat->m   = oldmat->m;
1961   a->n = mat->n   = oldmat->n;
1962   a->M = mat->M   = oldmat->M;
1963   a->N = mat->N   = oldmat->N;
1964 
1965   a->rstart       = oldmat->rstart;
1966   a->rend         = oldmat->rend;
1967   a->cstart       = oldmat->cstart;
1968   a->cend         = oldmat->cend;
1969   a->size         = oldmat->size;
1970   a->rank         = oldmat->rank;
1971   mat->insertmode = NOT_SET_VALUES;
1972   a->rowvalues    = 0;
1973   a->getrowactive = PETSC_FALSE;
1974 
1975   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1976   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1977   a->cowners = a->rowners + a->size + 2;
1978   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1979   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1980   if (oldmat->colmap) {
1981 #if defined (USE_CTABLE)
1982     ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr);
1983 #else
1984     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1985     PLogObjectMemory(mat,(a->N)*sizeof(int));
1986     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1987 #endif
1988   } else a->colmap = 0;
1989   if (oldmat->garray) {
1990     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1991     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1992     PLogObjectMemory(mat,len*sizeof(int));
1993     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1994   } else a->garray = 0;
1995 
1996   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1997   PLogObjectParent(mat,a->lvec);
1998   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1999   PLogObjectParent(mat,a->Mvctx);
2000   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2001   PLogObjectParent(mat,a->A);
2002   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2003   PLogObjectParent(mat,a->B);
2004   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2005   if (flg) {
2006     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2007   }
2008   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2009   *newmat = mat;
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #include "sys.h"
2014 
2015 #undef __FUNC__
2016 #define __FUNC__ "MatLoad_MPIAIJ"
2017 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
2018 {
2019   Mat          A;
2020   Scalar       *vals,*svals;
2021   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2022   MPI_Status   status;
2023   int          i, nz, ierr, j,rstart, rend, fd;
2024   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
2025   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
2026   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
2027 
2028   PetscFunctionBegin;
2029   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2030   if (!rank) {
2031     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2032     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2033     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2034     if (header[3] < 0) {
2035       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
2036     }
2037   }
2038 
2039   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2040   M = header[1]; N = header[2];
2041   /* determine ownership of all rows */
2042   m = M/size + ((M % size) > rank);
2043   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
2044   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2045   rowners[0] = 0;
2046   for ( i=2; i<=size; i++ ) {
2047     rowners[i] += rowners[i-1];
2048   }
2049   rstart = rowners[rank];
2050   rend   = rowners[rank+1];
2051 
2052   /* distribute row lengths to all processors */
2053   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
2054   offlens = ourlens + (rend-rstart);
2055   if (!rank) {
2056     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
2057     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2058     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2059     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2060     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2061     PetscFree(sndcounts);
2062   } else {
2063     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2064   }
2065 
2066   if (!rank) {
2067     /* calculate the number of nonzeros on each processor */
2068     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2069     PetscMemzero(procsnz,size*sizeof(int));
2070     for ( i=0; i<size; i++ ) {
2071       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2072         procsnz[i] += rowlengths[j];
2073       }
2074     }
2075     PetscFree(rowlengths);
2076 
2077     /* determine max buffer needed and allocate it */
2078     maxnz = 0;
2079     for ( i=0; i<size; i++ ) {
2080       maxnz = PetscMax(maxnz,procsnz[i]);
2081     }
2082     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2083 
2084     /* read in my part of the matrix column indices  */
2085     nz     = procsnz[0];
2086     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2087     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2088 
2089     /* read in every one elses and ship off */
2090     for ( i=1; i<size; i++ ) {
2091       nz   = procsnz[i];
2092       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2093       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2094     }
2095     PetscFree(cols);
2096   } else {
2097     /* determine buffer space needed for message */
2098     nz = 0;
2099     for ( i=0; i<m; i++ ) {
2100       nz += ourlens[i];
2101     }
2102     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2103 
2104     /* receive message of column indices*/
2105     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2106     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2107     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2108   }
2109 
2110   /* determine column ownership if matrix is not square */
2111   if (N != M) {
2112     n      = N/size + ((N % size) > rank);
2113     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2114     cstart = cend - n;
2115   } else {
2116     cstart = rstart;
2117     cend   = rend;
2118     n      = cend - cstart;
2119   }
2120 
2121   /* loop over local rows, determining number of off diagonal entries */
2122   PetscMemzero(offlens,m*sizeof(int));
2123   jj = 0;
2124   for ( i=0; i<m; i++ ) {
2125     for ( j=0; j<ourlens[i]; j++ ) {
2126       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2127       jj++;
2128     }
2129   }
2130 
2131   /* create our matrix */
2132   for ( i=0; i<m; i++ ) {
2133     ourlens[i] -= offlens[i];
2134   }
2135   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2136   A = *newmat;
2137   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2138   for ( i=0; i<m; i++ ) {
2139     ourlens[i] += offlens[i];
2140   }
2141 
2142   if (!rank) {
2143     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2144 
2145     /* read in my part of the matrix numerical values  */
2146     nz   = procsnz[0];
2147     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2148 
2149     /* insert into matrix */
2150     jj      = rstart;
2151     smycols = mycols;
2152     svals   = vals;
2153     for ( i=0; i<m; i++ ) {
2154       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2155       smycols += ourlens[i];
2156       svals   += ourlens[i];
2157       jj++;
2158     }
2159 
2160     /* read in other processors and ship out */
2161     for ( i=1; i<size; i++ ) {
2162       nz   = procsnz[i];
2163       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2164       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2165     }
2166     PetscFree(procsnz);
2167   } else {
2168     /* receive numeric values */
2169     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2170 
2171     /* receive message of values*/
2172     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2173     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2174     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2175 
2176     /* insert into matrix */
2177     jj      = rstart;
2178     smycols = mycols;
2179     svals   = vals;
2180     for ( i=0; i<m; i++ ) {
2181       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2182       smycols += ourlens[i];
2183       svals   += ourlens[i];
2184       jj++;
2185     }
2186   }
2187   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2188 
2189   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2190   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNC__
2195 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2196 /*
2197     Not great since it makes two copies of the submatrix, first an SeqAIJ
2198   in local and then by concatenating the local matrices the end result.
2199   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2200 */
2201 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2202 {
2203   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2204   Mat        *local,M, Mreuse;
2205   Scalar     *vwork,*aa;
2206   MPI_Comm   comm = mat->comm;
2207   Mat_SeqAIJ *aij;
2208   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2209 
2210   PetscFunctionBegin;
2211   MPI_Comm_rank(comm,&rank);
2212   MPI_Comm_size(comm,&size);
2213 
2214   if (call ==  MAT_REUSE_MATRIX) {
2215     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2216     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2217     local = &Mreuse;
2218     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2219   } else {
2220     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2221     Mreuse = *local;
2222     PetscFree(local);
2223   }
2224 
2225   /*
2226       m - number of local rows
2227       n - number of columns (same on all processors)
2228       rstart - first row in new global matrix generated
2229   */
2230   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2231   if (call == MAT_INITIAL_MATRIX) {
2232     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2233     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2234     ii  = aij->i;
2235     jj  = aij->j;
2236 
2237     /*
2238         Determine the number of non-zeros in the diagonal and off-diagonal
2239         portions of the matrix in order to do correct preallocation
2240     */
2241 
2242     /* first get start and end of "diagonal" columns */
2243     if (csize == PETSC_DECIDE) {
2244       nlocal = n/size + ((n % size) > rank);
2245     } else {
2246       nlocal = csize;
2247     }
2248     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2249     rstart = rend - nlocal;
2250     if (rank == size - 1 && rend != n) {
2251       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2252     }
2253 
2254     /* next, compute all the lengths */
2255     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2256     olens = dlens + m;
2257     for ( i=0; i<m; i++ ) {
2258       jend = ii[i+1] - ii[i];
2259       olen = 0;
2260       dlen = 0;
2261       for ( j=0; j<jend; j++ ) {
2262         if ( *jj < rstart || *jj >= rend) olen++;
2263         else dlen++;
2264         jj++;
2265       }
2266       olens[i] = olen;
2267       dlens[i] = dlen;
2268     }
2269     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2270     PetscFree(dlens);
2271   } else {
2272     int ml,nl;
2273 
2274     M = *newmat;
2275     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2276     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2277     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2278     /*
2279          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2280        rather than the slower MatSetValues().
2281     */
2282     M->was_assembled = PETSC_TRUE;
2283     M->assembled     = PETSC_FALSE;
2284   }
2285   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2286   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2287   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2288   ii  = aij->i;
2289   jj  = aij->j;
2290   aa  = aij->a;
2291   for (i=0; i<m; i++) {
2292     row   = rstart + i;
2293     nz    = ii[i+1] - ii[i];
2294     cwork = jj;     jj += nz;
2295     vwork = aa;     aa += nz;
2296     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2297   }
2298 
2299   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2300   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2301   *newmat = M;
2302 
2303   /* save submatrix used in processor for next request */
2304   if (call ==  MAT_INITIAL_MATRIX) {
2305     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2306     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2307   }
2308 
2309   PetscFunctionReturn(0);
2310 }
2311