xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e24b481b4a15e258c09dabebcc19da3246e723e1)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.263 1998/10/13 15:13:47 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         } else {
282           if (!aij->colmap) {
283             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
284           }
285           col = aij->colmap[idxn[j]] - 1;
286           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
287           else {
288             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
289           }
290         }
291       }
292     } else {
293       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
294     }
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNC__
300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
302 {
303   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
304   MPI_Comm    comm = mat->comm;
305   int         size = aij->size, *owners = aij->rowners;
306   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
307   MPI_Request *send_waits,*recv_waits;
308   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
309   InsertMode  addv;
310   Scalar      *rvalues,*svalues;
311 
312   PetscFunctionBegin;
313   if (aij->donotstash) {
314     aij->svalues    = 0; aij->rvalues    = 0;
315     aij->nsends     = 0; aij->nrecvs     = 0;
316     aij->send_waits = 0; aij->recv_waits = 0;
317     aij->rmax       = 0;
318     PetscFunctionReturn(0);
319   }
320 
321   /* make sure all processors are either in INSERTMODE or ADDMODE */
322   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
323   if (addv == (ADD_VALUES|INSERT_VALUES)) {
324     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
325   }
326   mat->insertmode = addv; /* in case this processor had no cache */
327 
328   /*  first count number of contributors to each processor */
329   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
330   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
331   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
332   for ( i=0; i<aij->stash.n; i++ ) {
333     idx = aij->stash.idx[i];
334     for ( j=0; j<size; j++ ) {
335       if (idx >= owners[j] && idx < owners[j+1]) {
336         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
337       }
338     }
339   }
340   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
341 
342   /* inform other processors of number of messages and max length*/
343   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
344   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
345   nreceives = work[rank];
346   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
347   nmax = work[rank];
348   PetscFree(work);
349 
350   /* post receives:
351        1) each message will consist of ordered pairs
352      (global index,value) we store the global index as a double
353      to simplify the message passing.
354        2) since we don't know how long each individual message is we
355      allocate the largest needed buffer for each receive. Potentially
356      this is a lot of wasted space.
357 
358 
359        This could be done better.
360   */
361   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
362   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
363   for ( i=0; i<nreceives; i++ ) {
364     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
365               comm,recv_waits+i);CHKERRQ(ierr);
366   }
367 
368   /* do sends:
369       1) starts[i] gives the starting index in svalues for stuff going to
370          the ith processor
371   */
372   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
373   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
374   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
375   starts[0]  = 0;
376   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
377   for ( i=0; i<aij->stash.n; i++ ) {
378     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
379     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
380     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
381   }
382   PetscFree(owner);
383   starts[0] = 0;
384   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
385   count = 0;
386   for ( i=0; i<size; i++ ) {
387     if (procs[i]) {
388       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
389     }
390   }
391   PetscFree(starts);
392   PetscFree(nprocs);
393 
394   /* Free cache space */
395   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
396   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
397 
398   aij->svalues    = svalues;    aij->rvalues    = rvalues;
399   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
400   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
401   aij->rmax       = nmax;
402 
403   PetscFunctionReturn(0);
404 }
405 extern int MatSetUpMultiply_MPIAIJ(Mat);
406 
407 #undef __FUNC__
408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
410 {
411   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
412   MPI_Status  *send_status,recv_status;
413   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
414   int         row,col,other_disassembled;
415   Scalar      *values,val;
416   InsertMode  addv = mat->insertmode;
417 
418   PetscFunctionBegin;
419 
420   /*  wait on receives */
421   while (count) {
422     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
423     /* unpack receives into our local space */
424     values = aij->rvalues + 3*imdex*aij->rmax;
425     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
426     n = n/3;
427     for ( i=0; i<n; i++ ) {
428       row = (int) PetscReal(values[3*i]) - aij->rstart;
429       col = (int) PetscReal(values[3*i+1]);
430       val = values[3*i+2];
431       if (col >= aij->cstart && col < aij->cend) {
432         col -= aij->cstart;
433         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
434       } else {
435         if (mat->was_assembled || mat->assembled) {
436           if (!aij->colmap) {
437             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
438           }
439           col = aij->colmap[col] - 1;
440           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
441             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
442             col = (int) PetscReal(values[3*i+1]);
443           }
444         }
445         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
446       }
447     }
448     count--;
449   }
450   if (aij->recv_waits) PetscFree(aij->recv_waits);
451   if (aij->rvalues)    PetscFree(aij->rvalues);
452 
453   /* wait on sends */
454   if (aij->nsends) {
455     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
456     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
457     PetscFree(send_status);
458   }
459   if (aij->send_waits) PetscFree(aij->send_waits);
460   if (aij->svalues)    PetscFree(aij->svalues);
461 
462   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
463   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
464 
465   /* determine if any processor has disassembled, if so we must
466      also disassemble ourselfs, in order that we may reassemble. */
467   /*
468      if nonzero structure of submatrix B cannot change then we know that
469      no processor disassembled thus we can skip this stuff
470   */
471   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
472     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
473     if (mat->was_assembled && !other_disassembled) {
474       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
475     }
476   }
477 
478   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
479     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
480   }
481   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
482   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
483 
484   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ "MatZeroEntries_MPIAIJ"
490 int MatZeroEntries_MPIAIJ(Mat A)
491 {
492   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
493   int        ierr;
494 
495   PetscFunctionBegin;
496   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
497   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNC__
502 #define __FUNC__ "MatZeroRows_MPIAIJ"
503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
504 {
505   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
506   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
507   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
508   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
509   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
510   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
511   MPI_Comm       comm = A->comm;
512   MPI_Request    *send_waits,*recv_waits;
513   MPI_Status     recv_status,*send_status;
514   IS             istmp;
515   PetscTruth     localdiag;
516 
517   PetscFunctionBegin;
518   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
519   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
520 
521   /*  first count number of contributors to each processor */
522   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
523   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
524   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
525   for ( i=0; i<N; i++ ) {
526     idx = rows[i];
527     found = 0;
528     for ( j=0; j<size; j++ ) {
529       if (idx >= owners[j] && idx < owners[j+1]) {
530         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
531       }
532     }
533     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
534   }
535   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
536 
537   /* inform other processors of number of messages and max length*/
538   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
539   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
540   nrecvs = work[rank];
541   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
542   nmax = work[rank];
543   PetscFree(work);
544 
545   /* post receives:   */
546   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
547   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
548   for ( i=0; i<nrecvs; i++ ) {
549     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
550   }
551 
552   /* do sends:
553       1) starts[i] gives the starting index in svalues for stuff going to
554          the ith processor
555   */
556   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
557   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
558   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
559   starts[0] = 0;
560   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
561   for ( i=0; i<N; i++ ) {
562     svalues[starts[owner[i]]++] = rows[i];
563   }
564   ISRestoreIndices(is,&rows);
565 
566   starts[0] = 0;
567   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
568   count = 0;
569   for ( i=0; i<size; i++ ) {
570     if (procs[i]) {
571       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
572     }
573   }
574   PetscFree(starts);
575 
576   base = owners[rank];
577 
578   /*  wait on receives */
579   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
580   source = lens + nrecvs;
581   count  = nrecvs; slen = 0;
582   while (count) {
583     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
584     /* unpack receives into our local space */
585     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
586     source[imdex]  = recv_status.MPI_SOURCE;
587     lens[imdex]  = n;
588     slen += n;
589     count--;
590   }
591   PetscFree(recv_waits);
592 
593   /* move the data into the send scatter */
594   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
595   count = 0;
596   for ( i=0; i<nrecvs; i++ ) {
597     values = rvalues + i*nmax;
598     for ( j=0; j<lens[i]; j++ ) {
599       lrows[count++] = values[j] - base;
600     }
601   }
602   PetscFree(rvalues); PetscFree(lens);
603   PetscFree(owner); PetscFree(nprocs);
604 
605   /* actually zap the local rows */
606   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
607   PLogObjectParent(A,istmp);
608 
609   /*
610         Zero the required rows. If the "diagonal block" of the matrix
611      is square and the user wishes to set the diagonal we use seperate
612      code so that MatSetValues() is not called for each diagonal allocating
613      new memory, thus calling lots of mallocs and slowing things down.
614 
615        Contributed by: Mathew Knepley
616   */
617   localdiag = PETSC_FALSE;
618   if (diag && (l->A->M == l->A->N)) {
619     localdiag = PETSC_TRUE;
620     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
621   } else {
622     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
623   }
624   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
625   ierr = ISDestroy(istmp); CHKERRQ(ierr);
626 
627   if (diag && (localdiag == PETSC_FALSE)) {
628     for ( i = 0; i < slen; i++ ) {
629       row = lrows[i] + rstart;
630       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
631     }
632     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
633     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
634   }
635 
636   if (diag) {
637     for ( i = 0; i < slen; i++ ) {
638       row = lrows[i] + rstart;
639       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
640     }
641     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
642     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
643   }
644   PetscFree(lrows);
645 
646   /* wait on sends */
647   if (nsends) {
648     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
649     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
650     PetscFree(send_status);
651   }
652   PetscFree(send_waits); PetscFree(svalues);
653 
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatMult_MPIAIJ"
659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
660 {
661   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
662   int        ierr,nt;
663 
664   PetscFunctionBegin;
665   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
666   if (nt != a->n) {
667     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
668   }
669   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
670   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
671   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
672   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNC__
677 #define __FUNC__ "MatMultAdd_MPIAIJ"
678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
679 {
680   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
681   int        ierr;
682 
683   PetscFunctionBegin;
684   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
685   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
686   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
687   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNC__
692 #define __FUNC__ "MatMultTrans_MPIAIJ"
693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
694 {
695   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696   int        ierr;
697 
698   PetscFunctionBegin;
699   /* do nondiagonal part */
700   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
701   /* send it on its way */
702   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
703   /* do local part */
704   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
705   /* receive remote parts: note this assumes the values are not actually */
706   /* inserted in yy until the next line, which is true for my implementation*/
707   /* but is not perhaps always true. */
708   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
717   int        ierr;
718 
719   PetscFunctionBegin;
720   /* do nondiagonal part */
721   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
722   /* send it on its way */
723   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
724   /* do local part */
725   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
726   /* receive remote parts: note this assumes the values are not actually */
727   /* inserted in yy until the next line, which is true for my implementation*/
728   /* but is not perhaps always true. */
729   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 /*
734   This only works correctly for square matrices where the subblock A->A is the
735    diagonal block
736 */
737 #undef __FUNC__
738 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
740 {
741   int        ierr;
742   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
743 
744   PetscFunctionBegin;
745   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
746   if (a->rstart != a->cstart || a->rend != a->cend) {
747     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
748   }
749   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNC__
754 #define __FUNC__ "MatScale_MPIAIJ"
755 int MatScale_MPIAIJ(Scalar *aa,Mat A)
756 {
757   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
758   int        ierr;
759 
760   PetscFunctionBegin;
761   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
762   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ "MatDestroy_MPIAIJ"
768 int MatDestroy_MPIAIJ(Mat mat)
769 {
770   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
771   int        ierr;
772 
773   PetscFunctionBegin;
774   if (--mat->refct > 0) PetscFunctionReturn(0);
775 
776   if (mat->mapping) {
777     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
778   }
779   if (mat->bmapping) {
780     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
781   }
782   if (mat->rmap) {
783     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
784   }
785   if (mat->cmap) {
786     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
787   }
788 #if defined(USE_PETSC_LOG)
789   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
790 #endif
791   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
792   PetscFree(aij->rowners);
793   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
794   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
795   if (aij->colmap) PetscFree(aij->colmap);
796   if (aij->garray) PetscFree(aij->garray);
797   if (aij->lvec)   VecDestroy(aij->lvec);
798   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
799   if (aij->rowvalues) PetscFree(aij->rowvalues);
800   PetscFree(aij);
801   PLogObjectDestroy(mat);
802   PetscHeaderDestroy(mat);
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNC__
807 #define __FUNC__ "MatView_MPIAIJ_Binary"
808 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
809 {
810   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
811   int         ierr;
812 
813   PetscFunctionBegin;
814   if (aij->size == 1) {
815     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
816   }
817   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
823 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
824 {
825   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
826   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
827   int         ierr, format,shift = C->indexshift,rank;
828   FILE        *fd;
829   ViewerType  vtype;
830 
831   PetscFunctionBegin;
832   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
833   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
834     ierr = ViewerGetFormat(viewer,&format);
835     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
836       MatInfo info;
837       int     flg;
838       MPI_Comm_rank(mat->comm,&rank);
839       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
840       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
841       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
842       PetscSequentialPhaseBegin(mat->comm,1);
843       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
844          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
845       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
846          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
847       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
848       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
849       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
850       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
851       fflush(fd);
852       PetscSequentialPhaseEnd(mat->comm,1);
853       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
854       PetscFunctionReturn(0);
855     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
856       PetscFunctionReturn(0);
857     }
858   }
859 
860   if (vtype == DRAW_VIEWER) {
861     Draw       draw;
862     PetscTruth isnull;
863     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
864     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
865   }
866 
867   if (vtype == ASCII_FILE_VIEWER) {
868     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
869     PetscSequentialPhaseBegin(mat->comm,1);
870     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
871            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
872            aij->cend);
873     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
874     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
875     fflush(fd);
876     PetscSequentialPhaseEnd(mat->comm,1);
877   } else {
878     int size = aij->size;
879     rank = aij->rank;
880     if (size == 1) {
881       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
882     } else {
883       /* assemble the entire matrix onto first processor. */
884       Mat         A;
885       Mat_SeqAIJ *Aloc;
886       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
887       Scalar      *a;
888 
889       if (!rank) {
890         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
891       } else {
892         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
893       }
894       PLogObjectParent(mat,A);
895 
896       /* copy over the A part */
897       Aloc = (Mat_SeqAIJ*) aij->A->data;
898       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
899       row = aij->rstart;
900       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
901       for ( i=0; i<m; i++ ) {
902         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
903         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
904       }
905       aj = Aloc->j;
906       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
907 
908       /* copy over the B part */
909       Aloc = (Mat_SeqAIJ*) aij->B->data;
910       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
911       row = aij->rstart;
912       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
913       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
914       for ( i=0; i<m; i++ ) {
915         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
916         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
917       }
918       PetscFree(ct);
919       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
920       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
921       /*
922          Everyone has to call to draw the matrix since the graphics waits are
923          synchronized across all processors that share the Draw object
924       */
925       if (!rank || vtype == DRAW_VIEWER) {
926         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
927       }
928       ierr = MatDestroy(A); CHKERRQ(ierr);
929     }
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNC__
935 #define __FUNC__ "MatView_MPIAIJ"
936 int MatView_MPIAIJ(Mat mat,Viewer viewer)
937 {
938   int         ierr;
939   ViewerType  vtype;
940 
941   PetscFunctionBegin;
942   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
943   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
944       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
945     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
946   } else if (vtype == BINARY_FILE_VIEWER) {
947     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
948   } else {
949     SETERRQ(1,1,"Viewer type not supported by PETSc object");
950   }
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955     This has to provide several versions.
956 
957      2) a) use only local smoothing updating outer values only once.
958         b) local smoothing updating outer values each inner iteration
959      3) color updating out values betwen colors.
960 */
961 #undef __FUNC__
962 #define __FUNC__ "MatRelax_MPIAIJ"
963 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
964                            double fshift,int its,Vec xx)
965 {
966   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
967   Mat        AA = mat->A, BB = mat->B;
968   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
969   Scalar     *b,*x,*xs,*ls,d,*v,sum;
970   int        ierr,*idx, *diag;
971   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
972 
973   PetscFunctionBegin;
974   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
975   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
976   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
977   xs = x + shift; /* shift by one for index start of 1 */
978   ls = ls + shift;
979   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
980   diag = A->diag;
981   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
982     if (flag & SOR_ZERO_INITIAL_GUESS) {
983       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
984       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
985       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
986       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
987       PetscFunctionReturn(0);
988     }
989     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
990     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
991     while (its--) {
992       /* go down through the rows */
993       for ( i=0; i<m; i++ ) {
994         n    = A->i[i+1] - A->i[i];
995 	PLogFlops(4*n+3);
996         idx  = A->j + A->i[i] + shift;
997         v    = A->a + A->i[i] + shift;
998         sum  = b[i];
999         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1000         d    = fshift + A->a[diag[i]+shift];
1001         n    = B->i[i+1] - B->i[i];
1002         idx  = B->j + B->i[i] + shift;
1003         v    = B->a + B->i[i] + shift;
1004         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1005         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1006       }
1007       /* come up through the rows */
1008       for ( i=m-1; i>-1; i-- ) {
1009         n    = A->i[i+1] - A->i[i];
1010 	PLogFlops(4*n+3)
1011         idx  = A->j + A->i[i] + shift;
1012         v    = A->a + A->i[i] + shift;
1013         sum  = b[i];
1014         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1015         d    = fshift + A->a[diag[i]+shift];
1016         n    = B->i[i+1] - B->i[i];
1017         idx  = B->j + B->i[i] + shift;
1018         v    = B->a + B->i[i] + shift;
1019         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1020         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1021       }
1022     }
1023   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024     if (flag & SOR_ZERO_INITIAL_GUESS) {
1025       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1026       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1027       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1028       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     }
1031     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1033     while (its--) {
1034       for ( i=0; i<m; i++ ) {
1035         n    = A->i[i+1] - A->i[i];
1036 	PLogFlops(4*n+3);
1037         idx  = A->j + A->i[i] + shift;
1038         v    = A->a + A->i[i] + shift;
1039         sum  = b[i];
1040         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1041         d    = fshift + A->a[diag[i]+shift];
1042         n    = B->i[i+1] - B->i[i];
1043         idx  = B->j + B->i[i] + shift;
1044         v    = B->a + B->i[i] + shift;
1045         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1046         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1047       }
1048     }
1049   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1050     if (flag & SOR_ZERO_INITIAL_GUESS) {
1051       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1052       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1053       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1054       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1055       PetscFunctionReturn(0);
1056     }
1057     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059     while (its--) {
1060       for ( i=m-1; i>-1; i-- ) {
1061         n    = A->i[i+1] - A->i[i];
1062 	PLogFlops(4*n+3);
1063         idx  = A->j + A->i[i] + shift;
1064         v    = A->a + A->i[i] + shift;
1065         sum  = b[i];
1066         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1067         d    = fshift + A->a[diag[i]+shift];
1068         n    = B->i[i+1] - B->i[i];
1069         idx  = B->j + B->i[i] + shift;
1070         v    = B->a + B->i[i] + shift;
1071         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1072         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1073       }
1074     }
1075   } else {
1076     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1077   }
1078   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1079   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1080   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNC__
1085 #define __FUNC__ "MatGetInfo_MPIAIJ"
1086 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1087 {
1088   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1089   Mat        A = mat->A, B = mat->B;
1090   int        ierr;
1091   double     isend[5], irecv[5];
1092 
1093   PetscFunctionBegin;
1094   info->block_size     = 1.0;
1095   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1096   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1097   isend[3] = info->memory;  isend[4] = info->mallocs;
1098   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1099   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1100   isend[3] += info->memory;  isend[4] += info->mallocs;
1101   if (flag == MAT_LOCAL) {
1102     info->nz_used      = isend[0];
1103     info->nz_allocated = isend[1];
1104     info->nz_unneeded  = isend[2];
1105     info->memory       = isend[3];
1106     info->mallocs      = isend[4];
1107   } else if (flag == MAT_GLOBAL_MAX) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else if (flag == MAT_GLOBAL_SUM) {
1115     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1116     info->nz_used      = irecv[0];
1117     info->nz_allocated = irecv[1];
1118     info->nz_unneeded  = irecv[2];
1119     info->memory       = irecv[3];
1120     info->mallocs      = irecv[4];
1121   }
1122   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1123   info->fill_ratio_needed = 0;
1124   info->factor_mallocs    = 0;
1125   info->rows_global       = (double)mat->M;
1126   info->columns_global    = (double)mat->N;
1127   info->rows_local        = (double)mat->m;
1128   info->columns_local     = (double)mat->N;
1129 
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNC__
1134 #define __FUNC__ "MatSetOption_MPIAIJ"
1135 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1136 {
1137   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1138 
1139   PetscFunctionBegin;
1140   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1141       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1142       op == MAT_COLUMNS_UNSORTED ||
1143       op == MAT_COLUMNS_SORTED ||
1144       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1145       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1146         MatSetOption(a->A,op);
1147         MatSetOption(a->B,op);
1148   } else if (op == MAT_ROW_ORIENTED) {
1149     a->roworiented = 1;
1150     MatSetOption(a->A,op);
1151     MatSetOption(a->B,op);
1152   } else if (op == MAT_ROWS_SORTED ||
1153              op == MAT_ROWS_UNSORTED ||
1154              op == MAT_SYMMETRIC ||
1155              op == MAT_STRUCTURALLY_SYMMETRIC ||
1156              op == MAT_YES_NEW_DIAGONALS)
1157     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1158   else if (op == MAT_COLUMN_ORIENTED) {
1159     a->roworiented = 0;
1160     MatSetOption(a->A,op);
1161     MatSetOption(a->B,op);
1162   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1163     a->donotstash = 1;
1164   } else if (op == MAT_NO_NEW_DIAGONALS){
1165     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1166   } else {
1167     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ "MatGetSize_MPIAIJ"
1174 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1175 {
1176   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1177 
1178   PetscFunctionBegin;
1179   if (m) *m = mat->M;
1180   if (n) *n = mat->N;
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNC__
1185 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1186 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1187 {
1188   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1189 
1190   PetscFunctionBegin;
1191   if (m) *m = mat->m;
1192   if (n) *n = mat->n;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNC__
1197 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1198 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1199 {
1200   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1201 
1202   PetscFunctionBegin;
1203   *m = mat->rstart; *n = mat->rend;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1208 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatGetRow_MPIAIJ"
1212 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1213 {
1214   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1215   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1216   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1217   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1218   int        *cmap, *idx_p;
1219 
1220   PetscFunctionBegin;
1221   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1222   mat->getrowactive = PETSC_TRUE;
1223 
1224   if (!mat->rowvalues && (idx || v)) {
1225     /*
1226         allocate enough space to hold information from the longest row.
1227     */
1228     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1229     int     max = 1,m = mat->m,tmp;
1230     for ( i=0; i<m; i++ ) {
1231       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1232       if (max < tmp) { max = tmp; }
1233     }
1234     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1235     mat->rowindices = (int *) (mat->rowvalues + max);
1236   }
1237 
1238   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1239   lrow = row - rstart;
1240 
1241   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1242   if (!v)   {pvA = 0; pvB = 0;}
1243   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1244   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1245   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1246   nztot = nzA + nzB;
1247 
1248   cmap  = mat->garray;
1249   if (v  || idx) {
1250     if (nztot) {
1251       /* Sort by increasing column numbers, assuming A and B already sorted */
1252       int imark = -1;
1253       if (v) {
1254         *v = v_p = mat->rowvalues;
1255         for ( i=0; i<nzB; i++ ) {
1256           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1257           else break;
1258         }
1259         imark = i;
1260         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1261         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1262       }
1263       if (idx) {
1264         *idx = idx_p = mat->rowindices;
1265         if (imark > -1) {
1266           for ( i=0; i<imark; i++ ) {
1267             idx_p[i] = cmap[cworkB[i]];
1268           }
1269         } else {
1270           for ( i=0; i<nzB; i++ ) {
1271             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1272             else break;
1273           }
1274           imark = i;
1275         }
1276         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1277         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1278       }
1279     } else {
1280       if (idx) *idx = 0;
1281       if (v)   *v   = 0;
1282     }
1283   }
1284   *nz = nztot;
1285   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1286   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNC__
1291 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1292 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1293 {
1294   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1295 
1296   PetscFunctionBegin;
1297   if (aij->getrowactive == PETSC_FALSE) {
1298     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1299   }
1300   aij->getrowactive = PETSC_FALSE;
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNC__
1305 #define __FUNC__ "MatNorm_MPIAIJ"
1306 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1307 {
1308   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1309   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1310   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1311   double     sum = 0.0;
1312   Scalar     *v;
1313 
1314   PetscFunctionBegin;
1315   if (aij->size == 1) {
1316     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1317   } else {
1318     if (type == NORM_FROBENIUS) {
1319       v = amat->a;
1320       for (i=0; i<amat->nz; i++ ) {
1321 #if defined(USE_PETSC_COMPLEX)
1322         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1323 #else
1324         sum += (*v)*(*v); v++;
1325 #endif
1326       }
1327       v = bmat->a;
1328       for (i=0; i<bmat->nz; i++ ) {
1329 #if defined(USE_PETSC_COMPLEX)
1330         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1331 #else
1332         sum += (*v)*(*v); v++;
1333 #endif
1334       }
1335       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1336       *norm = sqrt(*norm);
1337     } else if (type == NORM_1) { /* max column norm */
1338       double *tmp, *tmp2;
1339       int    *jj, *garray = aij->garray;
1340       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1341       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1342       PetscMemzero(tmp,aij->N*sizeof(double));
1343       *norm = 0.0;
1344       v = amat->a; jj = amat->j;
1345       for ( j=0; j<amat->nz; j++ ) {
1346         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1347       }
1348       v = bmat->a; jj = bmat->j;
1349       for ( j=0; j<bmat->nz; j++ ) {
1350         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1351       }
1352       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1353       for ( j=0; j<aij->N; j++ ) {
1354         if (tmp2[j] > *norm) *norm = tmp2[j];
1355       }
1356       PetscFree(tmp); PetscFree(tmp2);
1357     } else if (type == NORM_INFINITY) { /* max row norm */
1358       double ntemp = 0.0;
1359       for ( j=0; j<amat->m; j++ ) {
1360         v = amat->a + amat->i[j] + shift;
1361         sum = 0.0;
1362         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1363           sum += PetscAbsScalar(*v); v++;
1364         }
1365         v = bmat->a + bmat->i[j] + shift;
1366         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         if (sum > ntemp) ntemp = sum;
1370       }
1371       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1372     } else {
1373       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1374     }
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNC__
1380 #define __FUNC__ "MatTranspose_MPIAIJ"
1381 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1382 {
1383   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1384   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1385   int        ierr,shift = Aloc->indexshift;
1386   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1387   Mat        B;
1388   Scalar     *array;
1389 
1390   PetscFunctionBegin;
1391   if (matout == PETSC_NULL && M != N) {
1392     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1393   }
1394 
1395   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1396 
1397   /* copy over the A part */
1398   Aloc = (Mat_SeqAIJ*) a->A->data;
1399   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1400   row = a->rstart;
1401   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1402   for ( i=0; i<m; i++ ) {
1403     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1404     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1405   }
1406   aj = Aloc->j;
1407   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1408 
1409   /* copy over the B part */
1410   Aloc = (Mat_SeqAIJ*) a->B->data;
1411   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1412   row = a->rstart;
1413   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1414   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1415   for ( i=0; i<m; i++ ) {
1416     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1417     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1418   }
1419   PetscFree(ct);
1420   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1421   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1422   if (matout != PETSC_NULL) {
1423     *matout = B;
1424   } else {
1425     PetscOps       *Abops;
1426     struct _MatOps *Aops;
1427 
1428     /* This isn't really an in-place transpose .... but free data structures from a */
1429     PetscFree(a->rowners);
1430     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1431     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1432     if (a->colmap) PetscFree(a->colmap);
1433     if (a->garray) PetscFree(a->garray);
1434     if (a->lvec) VecDestroy(a->lvec);
1435     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1436     PetscFree(a);
1437 
1438     /*
1439        This is horrible, horrible code. We need to keep the
1440       A pointers for the bops and ops but copy everything
1441       else from C.
1442     */
1443     Abops = A->bops;
1444     Aops  = A->ops;
1445     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1446     A->bops = Abops;
1447     A->ops  = Aops;
1448     PetscHeaderDestroy(B);
1449   }
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNC__
1454 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1455 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1456 {
1457   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1458   Mat a = aij->A, b = aij->B;
1459   int ierr,s1,s2,s3;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1463   if (rr) {
1464     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1465     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1466     /* Overlap communication with computation. */
1467     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1468   }
1469   if (ll) {
1470     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1471     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1472     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1473   }
1474   /* scale  the diagonal block */
1475   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1476 
1477   if (rr) {
1478     /* Do a scatter end and then right scale the off-diagonal block */
1479     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1480     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1481   }
1482 
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 
1487 extern int MatPrintHelp_SeqAIJ(Mat);
1488 #undef __FUNC__
1489 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1490 int MatPrintHelp_MPIAIJ(Mat A)
1491 {
1492   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1493   int        ierr;
1494 
1495   PetscFunctionBegin;
1496   if (!a->rank) {
1497     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNC__
1503 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1504 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1505 {
1506   PetscFunctionBegin;
1507   *bs = 1;
1508   PetscFunctionReturn(0);
1509 }
1510 #undef __FUNC__
1511 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1512 int MatSetUnfactored_MPIAIJ(Mat A)
1513 {
1514   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1515   int        ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNC__
1523 #define __FUNC__ "MatEqual_MPIAIJ"
1524 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1525 {
1526   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1527   Mat        a, b, c, d;
1528   PetscTruth flg;
1529   int        ierr;
1530 
1531   PetscFunctionBegin;
1532   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1533   a = matA->A; b = matA->B;
1534   c = matB->A; d = matB->B;
1535 
1536   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1537   if (flg == PETSC_TRUE) {
1538     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1539   }
1540   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNC__
1545 #define __FUNC__ "MatCopy_MPIAIJ"
1546 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1547 {
1548   int        ierr;
1549   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1550   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1551 
1552   PetscFunctionBegin;
1553   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1554     /* because of the column compression in the off-processor part of the matrix a->B,
1555        the number of columns in a->B and b->B may be different, hence we cannot call
1556        the MatCopy() directly on the two parts. If need be, we can provide a more
1557        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1558        then copying the submatrices */
1559     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1560   } else {
1561     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1562     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1568 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1569 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1570 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1571 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1572 
1573 /* -------------------------------------------------------------------*/
1574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1575        MatGetRow_MPIAIJ,
1576        MatRestoreRow_MPIAIJ,
1577        MatMult_MPIAIJ,
1578        MatMultAdd_MPIAIJ,
1579        MatMultTrans_MPIAIJ,
1580        MatMultTransAdd_MPIAIJ,
1581        0,
1582        0,
1583        0,
1584        0,
1585        0,
1586        0,
1587        MatRelax_MPIAIJ,
1588        MatTranspose_MPIAIJ,
1589        MatGetInfo_MPIAIJ,
1590        MatEqual_MPIAIJ,
1591        MatGetDiagonal_MPIAIJ,
1592        MatDiagonalScale_MPIAIJ,
1593        MatNorm_MPIAIJ,
1594        MatAssemblyBegin_MPIAIJ,
1595        MatAssemblyEnd_MPIAIJ,
1596        0,
1597        MatSetOption_MPIAIJ,
1598        MatZeroEntries_MPIAIJ,
1599        MatZeroRows_MPIAIJ,
1600        0,
1601        0,
1602        0,
1603        0,
1604        MatGetSize_MPIAIJ,
1605        MatGetLocalSize_MPIAIJ,
1606        MatGetOwnershipRange_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610        0,
1611        MatDuplicate_MPIAIJ,
1612        0,
1613        0,
1614        0,
1615        0,
1616        0,
1617        MatGetSubMatrices_MPIAIJ,
1618        MatIncreaseOverlap_MPIAIJ,
1619        MatGetValues_MPIAIJ,
1620        MatCopy_MPIAIJ,
1621        MatPrintHelp_MPIAIJ,
1622        MatScale_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626        MatGetBlockSize_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631        MatFDColoringCreate_MPIAIJ,
1632        0,
1633        MatSetUnfactored_MPIAIJ,
1634        0,
1635        0,
1636        MatGetSubMatrix_MPIAIJ,
1637        0,
1638        0,
1639        MatGetMaps_Petsc};
1640 
1641 /* ----------------------------------------------------------------------------------------*/
1642 
1643 EXTERN_C_BEGIN
1644 #undef __FUNC__
1645 #define __FUNC__ "MatStoreValues_MPIAIJ"
1646 int MatStoreValues_MPIAIJ(Mat mat)
1647 {
1648   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1649   int        ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1653   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 EXTERN_C_END
1657 
1658 EXTERN_C_BEGIN
1659 #undef __FUNC__
1660 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1661 int MatRetrieveValues_MPIAIJ(Mat mat)
1662 {
1663   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1664   int        ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1668   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 EXTERN_C_END
1672 
1673 #undef __FUNC__
1674 #define __FUNC__ "MatCreateMPIAIJ"
1675 /*@C
1676    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1677    (the default parallel PETSc format).  For good matrix assembly performance
1678    the user should preallocate the matrix storage by setting the parameters
1679    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1680    performance can be increased by more than a factor of 50.
1681 
1682    Collective on MPI_Comm
1683 
1684    Input Parameters:
1685 +  comm - MPI communicator
1686 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1687            This value should be the same as the local size used in creating the
1688            y vector for the matrix-vector product y = Ax.
1689 .  n - This value should be the same as the local size used in creating the
1690        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1691        calculated if N is given) For square matrices n is almost always m.
1692 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1693 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1694 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1695            (same for all local rows)
1696 .  d_nzz - array containing the number of nonzeros in the various rows of the
1697            diagonal portion of the local submatrix (possibly different for each row)
1698            or PETSC_NULL. You must leave room for the diagonal entry even if
1699            it is zero.
1700 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1701            submatrix (same for all local rows).
1702 -  o_nzz - array containing the number of nonzeros in the various rows of the
1703            off-diagonal portion of the local submatrix (possibly different for
1704            each row) or PETSC_NULL.
1705 
1706    Output Parameter:
1707 .  A - the matrix
1708 
1709    Notes:
1710    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1711    than it must be used on all processors that share the object for that argument.
1712 
1713    The AIJ format (also called the Yale sparse matrix format or
1714    compressed row storage), is fully compatible with standard Fortran 77
1715    storage.  That is, the stored row and column indices can begin at
1716    either one (as in Fortran) or zero.  See the users manual for details.
1717 
1718    The user MUST specify either the local or global matrix dimensions
1719    (possibly both).
1720 
1721    By default, this format uses inodes (identical nodes) when possible.
1722    We search for consecutive rows with the same nonzero structure, thereby
1723    reusing matrix information to achieve increased efficiency.
1724 
1725    Options Database Keys:
1726 +  -mat_aij_no_inode  - Do not use inodes
1727 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1728 -  -mat_aij_oneindex - Internally use indexing starting at 1
1729         rather than 0.  Note that when calling MatSetValues(),
1730         the user still MUST index entries starting at 0!
1731 .   -mat_mpi - use the parallel matrix data structures even on one processor
1732                (defaults to using SeqBAIJ format on one processor)
1733 .   -mat_mpi - use the parallel matrix data structures even on one processor
1734                (defaults to using SeqAIJ format on one processor)
1735 
1736 
1737    Storage Information:
1738    For a square global matrix we define each processor's diagonal portion
1739    to be its local rows and the corresponding columns (a square submatrix);
1740    each processor's off-diagonal portion encompasses the remainder of the
1741    local matrix (a rectangular submatrix).
1742 
1743    The user can specify preallocated storage for the diagonal part of
1744    the local submatrix with either d_nz or d_nnz (not both).  Set
1745    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1746    memory allocation.  Likewise, specify preallocated storage for the
1747    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1748 
1749    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1750    the figure below we depict these three local rows and all columns (0-11).
1751 
1752 .vb
1753              0 1 2 3 4 5 6 7 8 9 10 11
1754             -------------------
1755      row 3  |  o o o d d d o o o o o o
1756      row 4  |  o o o d d d o o o o o o
1757      row 5  |  o o o d d d o o o o o o
1758             -------------------
1759 .ve
1760 
1761    Thus, any entries in the d locations are stored in the d (diagonal)
1762    submatrix, and any entries in the o locations are stored in the
1763    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1764    stored simply in the MATSEQAIJ format for compressed row storage.
1765 
1766    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1767    and o_nz should indicate the number of nonzeros per row in the o matrix.
1768    In general, for PDE problems in which most nonzeros are near the diagonal,
1769    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1770    or you will get TERRIBLE performance; see the users' manual chapter on
1771    matrices.
1772 
1773 .keywords: matrix, aij, compressed row, sparse, parallel
1774 
1775 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1776 @*/
1777 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1778 {
1779   Mat          B;
1780   Mat_MPIAIJ   *b;
1781   int          ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0;
1782 
1783   PetscFunctionBegin;
1784   MPI_Comm_size(comm,&size);
1785   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1786   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1787   if (!flag1 && !flag2 && size == 1) {
1788     if (M == PETSC_DECIDE) M = m;
1789     if (N == PETSC_DECIDE) N = n;
1790     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1791     PetscFunctionReturn(0);
1792   }
1793 
1794   *A = 0;
1795   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1796   PLogObjectCreate(B);
1797   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1798   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1799   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1800   B->ops->destroy    = MatDestroy_MPIAIJ;
1801   B->ops->view       = MatView_MPIAIJ;
1802   B->factor          = 0;
1803   B->assembled       = PETSC_FALSE;
1804   B->mapping         = 0;
1805 
1806   B->insertmode      = NOT_SET_VALUES;
1807   b->size            = size;
1808   MPI_Comm_rank(comm,&b->rank);
1809 
1810   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1811     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1812   }
1813 
1814   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1815     work[0] = m; work[1] = n;
1816     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1817     if (M == PETSC_DECIDE) M = sum[0];
1818     if (N == PETSC_DECIDE) N = sum[1];
1819   }
1820   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1821   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1822   b->m = m; B->m = m;
1823   b->n = n; B->n = n;
1824   b->N = N; B->N = N;
1825   b->M = M; B->M = M;
1826 
1827   /* the information in the maps duplicates the information computed below, eventually
1828      we should remove the duplicate information that is not contained in the maps */
1829   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1830   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1831 
1832   /* build local table of row and column ownerships */
1833   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1834   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1835   b->cowners = b->rowners + b->size + 2;
1836   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1837   b->rowners[0] = 0;
1838   for ( i=2; i<=b->size; i++ ) {
1839     b->rowners[i] += b->rowners[i-1];
1840   }
1841   b->rstart = b->rowners[b->rank];
1842   b->rend   = b->rowners[b->rank+1];
1843   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1844   b->cowners[0] = 0;
1845   for ( i=2; i<=b->size; i++ ) {
1846     b->cowners[i] += b->cowners[i-1];
1847   }
1848   b->cstart = b->cowners[b->rank];
1849   b->cend   = b->cowners[b->rank+1];
1850 
1851   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1852   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1853   PLogObjectParent(B,b->A);
1854   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1855   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1856   PLogObjectParent(B,b->B);
1857 
1858   /* build cache for off array entries formed */
1859   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1860   b->donotstash  = 0;
1861   b->colmap      = 0;
1862   b->garray      = 0;
1863   b->roworiented = 1;
1864 
1865   /* stuff used for matrix vector multiply */
1866   b->lvec      = 0;
1867   b->Mvctx     = 0;
1868 
1869   /* stuff for MatGetRow() */
1870   b->rowindices   = 0;
1871   b->rowvalues    = 0;
1872   b->getrowactive = PETSC_FALSE;
1873 
1874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1875                                      "MatStoreValues_MPIAIJ",
1876                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1878                                      "MatRetrieveValues_MPIAIJ",
1879                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1880   *A = B;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNC__
1885 #define __FUNC__ "MatDuplicate_MPIAIJ"
1886 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1887 {
1888   Mat        mat;
1889   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1890   int        ierr, len=0, flg;
1891 
1892   PetscFunctionBegin;
1893   *newmat       = 0;
1894   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1895   PLogObjectCreate(mat);
1896   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1897   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1898   mat->ops->destroy    = MatDestroy_MPIAIJ;
1899   mat->ops->view       = MatView_MPIAIJ;
1900   mat->factor     = matin->factor;
1901   mat->assembled  = PETSC_TRUE;
1902 
1903   a->m = mat->m   = oldmat->m;
1904   a->n = mat->n   = oldmat->n;
1905   a->M = mat->M   = oldmat->M;
1906   a->N = mat->N   = oldmat->N;
1907 
1908   a->rstart       = oldmat->rstart;
1909   a->rend         = oldmat->rend;
1910   a->cstart       = oldmat->cstart;
1911   a->cend         = oldmat->cend;
1912   a->size         = oldmat->size;
1913   a->rank         = oldmat->rank;
1914   mat->insertmode = NOT_SET_VALUES;
1915   a->rowvalues    = 0;
1916   a->getrowactive = PETSC_FALSE;
1917 
1918   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1919   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1920   a->cowners = a->rowners + a->size + 2;
1921   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1922   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1923   if (oldmat->colmap) {
1924     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1925     PLogObjectMemory(mat,(a->N)*sizeof(int));
1926     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1927   } else a->colmap = 0;
1928   if (oldmat->garray) {
1929     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1930     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1931     PLogObjectMemory(mat,len*sizeof(int));
1932     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1933   } else a->garray = 0;
1934 
1935   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1936   PLogObjectParent(mat,a->lvec);
1937   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1938   PLogObjectParent(mat,a->Mvctx);
1939   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1940   PLogObjectParent(mat,a->A);
1941   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
1942   PLogObjectParent(mat,a->B);
1943   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1944   if (flg) {
1945     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1946   }
1947   *newmat = mat;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #include "sys.h"
1952 
1953 #undef __FUNC__
1954 #define __FUNC__ "MatLoad_MPIAIJ"
1955 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1956 {
1957   Mat          A;
1958   Scalar       *vals,*svals;
1959   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1960   MPI_Status   status;
1961   int          i, nz, ierr, j,rstart, rend, fd;
1962   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1963   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1964   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1965 
1966   PetscFunctionBegin;
1967   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1968   if (!rank) {
1969     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1970     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1971     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1972     if (header[3] < 0) {
1973       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1974     }
1975   }
1976 
1977   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1978   M = header[1]; N = header[2];
1979   /* determine ownership of all rows */
1980   m = M/size + ((M % size) > rank);
1981   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1982   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1983   rowners[0] = 0;
1984   for ( i=2; i<=size; i++ ) {
1985     rowners[i] += rowners[i-1];
1986   }
1987   rstart = rowners[rank];
1988   rend   = rowners[rank+1];
1989 
1990   /* distribute row lengths to all processors */
1991   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1992   offlens = ourlens + (rend-rstart);
1993   if (!rank) {
1994     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1995     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1996     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1997     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1998     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1999     PetscFree(sndcounts);
2000   } else {
2001     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2002   }
2003 
2004   if (!rank) {
2005     /* calculate the number of nonzeros on each processor */
2006     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2007     PetscMemzero(procsnz,size*sizeof(int));
2008     for ( i=0; i<size; i++ ) {
2009       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2010         procsnz[i] += rowlengths[j];
2011       }
2012     }
2013     PetscFree(rowlengths);
2014 
2015     /* determine max buffer needed and allocate it */
2016     maxnz = 0;
2017     for ( i=0; i<size; i++ ) {
2018       maxnz = PetscMax(maxnz,procsnz[i]);
2019     }
2020     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2021 
2022     /* read in my part of the matrix column indices  */
2023     nz     = procsnz[0];
2024     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2025     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2026 
2027     /* read in every one elses and ship off */
2028     for ( i=1; i<size; i++ ) {
2029       nz   = procsnz[i];
2030       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2031       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2032     }
2033     PetscFree(cols);
2034   } else {
2035     /* determine buffer space needed for message */
2036     nz = 0;
2037     for ( i=0; i<m; i++ ) {
2038       nz += ourlens[i];
2039     }
2040     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2041 
2042     /* receive message of column indices*/
2043     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2044     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2045     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2046   }
2047 
2048   /* determine column ownership if matrix is not square */
2049   if (N != M) {
2050     n      = N/size + ((N % size) > rank);
2051     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2052     cstart = cend - n;
2053   } else {
2054     cstart = rstart;
2055     cend   = rend;
2056     n      = cend - cstart;
2057   }
2058 
2059   /* loop over local rows, determining number of off diagonal entries */
2060   PetscMemzero(offlens,m*sizeof(int));
2061   jj = 0;
2062   for ( i=0; i<m; i++ ) {
2063     for ( j=0; j<ourlens[i]; j++ ) {
2064       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2065       jj++;
2066     }
2067   }
2068 
2069   /* create our matrix */
2070   for ( i=0; i<m; i++ ) {
2071     ourlens[i] -= offlens[i];
2072   }
2073   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2074   A = *newmat;
2075   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2076   for ( i=0; i<m; i++ ) {
2077     ourlens[i] += offlens[i];
2078   }
2079 
2080   if (!rank) {
2081     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2082 
2083     /* read in my part of the matrix numerical values  */
2084     nz   = procsnz[0];
2085     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2086 
2087     /* insert into matrix */
2088     jj      = rstart;
2089     smycols = mycols;
2090     svals   = vals;
2091     for ( i=0; i<m; i++ ) {
2092       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2093       smycols += ourlens[i];
2094       svals   += ourlens[i];
2095       jj++;
2096     }
2097 
2098     /* read in other processors and ship out */
2099     for ( i=1; i<size; i++ ) {
2100       nz   = procsnz[i];
2101       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2102       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2103     }
2104     PetscFree(procsnz);
2105   } else {
2106     /* receive numeric values */
2107     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2108 
2109     /* receive message of values*/
2110     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2111     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2112     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2113 
2114     /* insert into matrix */
2115     jj      = rstart;
2116     smycols = mycols;
2117     svals   = vals;
2118     for ( i=0; i<m; i++ ) {
2119       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2120       smycols += ourlens[i];
2121       svals   += ourlens[i];
2122       jj++;
2123     }
2124   }
2125   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2126 
2127   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2128   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNC__
2133 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2134 /*
2135     Not great since it makes two copies of the submatrix, first an SeqAIJ
2136   in local and then by concatenating the local matrices the end result.
2137   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2138 */
2139 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
2140 {
2141   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2142   Mat        *local,M, Mreuse;
2143   Scalar     *vwork,*aa;
2144   MPI_Comm   comm = mat->comm;
2145   Mat_SeqAIJ *aij;
2146   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2147 
2148   PetscFunctionBegin;
2149   MPI_Comm_rank(comm,&rank);
2150   MPI_Comm_size(comm,&size);
2151 
2152   if (call ==  MAT_REUSE_MATRIX) {
2153     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2154     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2155     local = &Mreuse;
2156     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2157   } else {
2158     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2159     Mreuse = *local;
2160     PetscFree(local);
2161   }
2162 
2163   /*
2164       m - number of local rows
2165       n - number of columns (same on all processors)
2166       rstart - first row in new global matrix generated
2167   */
2168   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2169   if (call == MAT_INITIAL_MATRIX) {
2170     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2171     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2172     ii  = aij->i;
2173     jj  = aij->j;
2174 
2175     /*
2176         Determine the number of non-zeros in the diagonal and off-diagonal
2177         portions of the matrix in order to do correct preallocation
2178     */
2179 
2180     /* first get start and end of "diagonal" columns */
2181     if (csize == PETSC_DECIDE) {
2182       nlocal = n/size + ((n % size) > rank);
2183     } else {
2184       nlocal = csize;
2185     }
2186     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2187     rstart = rend - nlocal;
2188     if (rank == size - 1 && rend != n) {
2189       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2190     }
2191 
2192     /* next, compute all the lengths */
2193     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2194     olens = dlens + m;
2195     for ( i=0; i<m; i++ ) {
2196       jend = ii[i+1] - ii[i];
2197       olen = 0;
2198       dlen = 0;
2199       for ( j=0; j<jend; j++ ) {
2200         if ( *jj < rstart || *jj >= rend) olen++;
2201         else dlen++;
2202         jj++;
2203       }
2204       olens[i] = olen;
2205       dlens[i] = dlen;
2206     }
2207     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2208     PetscFree(dlens);
2209   } else {
2210     int ml,nl;
2211 
2212     M = *newmat;
2213     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2214     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2215     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2216     /*
2217          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2218        rather than the slower MatSetValues().
2219     */
2220     M->was_assembled = PETSC_TRUE;
2221     M->assembled     = PETSC_FALSE;
2222   }
2223   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2224   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2225   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2226   ii  = aij->i;
2227   jj  = aij->j;
2228   aa  = aij->a;
2229   for (i=0; i<m; i++) {
2230     row   = rstart + i;
2231     nz    = ii[i+1] - ii[i];
2232     cwork = jj;     jj += nz;
2233     vwork = aa;     aa += nz;
2234     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2235   }
2236 
2237   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2238   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2239   *newmat = M;
2240 
2241   /* save submatrix used in processor for next request */
2242   if (call ==  MAT_INITIAL_MATRIX) {
2243     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2244     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2245   }
2246 
2247   PetscFunctionReturn(0);
2248 }
2249