xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 74c49fae86232ee8d4cedab0aa791f2687f7bef6)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.262 1998/10/09 19:22:12 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         } else {
282           if (!aij->colmap) {
283             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
284           }
285           col = aij->colmap[idxn[j]] - 1;
286           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
287           else {
288             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
289           }
290         }
291       }
292     } else {
293       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
294     }
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNC__
300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
302 {
303   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
304   MPI_Comm    comm = mat->comm;
305   int         size = aij->size, *owners = aij->rowners;
306   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
307   MPI_Request *send_waits,*recv_waits;
308   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
309   InsertMode  addv;
310   Scalar      *rvalues,*svalues;
311 
312   PetscFunctionBegin;
313   if (aij->donotstash) {
314     aij->svalues    = 0; aij->rvalues    = 0;
315     aij->nsends     = 0; aij->nrecvs     = 0;
316     aij->send_waits = 0; aij->recv_waits = 0;
317     aij->rmax       = 0;
318     PetscFunctionReturn(0);
319   }
320 
321   /* make sure all processors are either in INSERTMODE or ADDMODE */
322   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
323   if (addv == (ADD_VALUES|INSERT_VALUES)) {
324     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
325   }
326   mat->insertmode = addv; /* in case this processor had no cache */
327 
328   /*  first count number of contributors to each processor */
329   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
330   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
331   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
332   for ( i=0; i<aij->stash.n; i++ ) {
333     idx = aij->stash.idx[i];
334     for ( j=0; j<size; j++ ) {
335       if (idx >= owners[j] && idx < owners[j+1]) {
336         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
337       }
338     }
339   }
340   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
341 
342   /* inform other processors of number of messages and max length*/
343   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
344   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
345   nreceives = work[rank];
346   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
347   nmax = work[rank];
348   PetscFree(work);
349 
350   /* post receives:
351        1) each message will consist of ordered pairs
352      (global index,value) we store the global index as a double
353      to simplify the message passing.
354        2) since we don't know how long each individual message is we
355      allocate the largest needed buffer for each receive. Potentially
356      this is a lot of wasted space.
357 
358 
359        This could be done better.
360   */
361   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
362   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
363   for ( i=0; i<nreceives; i++ ) {
364     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
365               comm,recv_waits+i);CHKERRQ(ierr);
366   }
367 
368   /* do sends:
369       1) starts[i] gives the starting index in svalues for stuff going to
370          the ith processor
371   */
372   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
373   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
374   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
375   starts[0]  = 0;
376   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
377   for ( i=0; i<aij->stash.n; i++ ) {
378     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
379     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
380     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
381   }
382   PetscFree(owner);
383   starts[0] = 0;
384   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
385   count = 0;
386   for ( i=0; i<size; i++ ) {
387     if (procs[i]) {
388       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
389     }
390   }
391   PetscFree(starts);
392   PetscFree(nprocs);
393 
394   /* Free cache space */
395   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
396   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
397 
398   aij->svalues    = svalues;    aij->rvalues    = rvalues;
399   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
400   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
401   aij->rmax       = nmax;
402 
403   PetscFunctionReturn(0);
404 }
405 extern int MatSetUpMultiply_MPIAIJ(Mat);
406 
407 #undef __FUNC__
408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
410 {
411   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
412   MPI_Status  *send_status,recv_status;
413   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
414   int         row,col,other_disassembled;
415   Scalar      *values,val;
416   InsertMode  addv = mat->insertmode;
417 
418   PetscFunctionBegin;
419 
420   /*  wait on receives */
421   while (count) {
422     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
423     /* unpack receives into our local space */
424     values = aij->rvalues + 3*imdex*aij->rmax;
425     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
426     n = n/3;
427     for ( i=0; i<n; i++ ) {
428       row = (int) PetscReal(values[3*i]) - aij->rstart;
429       col = (int) PetscReal(values[3*i+1]);
430       val = values[3*i+2];
431       if (col >= aij->cstart && col < aij->cend) {
432         col -= aij->cstart;
433         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
434       } else {
435         if (mat->was_assembled || mat->assembled) {
436           if (!aij->colmap) {
437             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
438           }
439           col = aij->colmap[col] - 1;
440           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
441             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
442             col = (int) PetscReal(values[3*i+1]);
443           }
444         }
445         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
446       }
447     }
448     count--;
449   }
450   if (aij->recv_waits) PetscFree(aij->recv_waits);
451   if (aij->rvalues)    PetscFree(aij->rvalues);
452 
453   /* wait on sends */
454   if (aij->nsends) {
455     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
456     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
457     PetscFree(send_status);
458   }
459   if (aij->send_waits) PetscFree(aij->send_waits);
460   if (aij->svalues)    PetscFree(aij->svalues);
461 
462   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
463   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
464 
465   /* determine if any processor has disassembled, if so we must
466      also disassemble ourselfs, in order that we may reassemble. */
467   /*
468      if nonzero structure of submatrix B cannot change then we know that
469      no processor disassembled thus we can skip this stuff
470   */
471   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
472     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
473     if (mat->was_assembled && !other_disassembled) {
474       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
475     }
476   }
477 
478   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
479     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
480   }
481   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
482   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
483 
484   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ "MatZeroEntries_MPIAIJ"
490 int MatZeroEntries_MPIAIJ(Mat A)
491 {
492   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
493   int        ierr;
494 
495   PetscFunctionBegin;
496   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
497   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNC__
502 #define __FUNC__ "MatZeroRows_MPIAIJ"
503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
504 {
505   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
506   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
507   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
508   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
509   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
510   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
511   MPI_Comm       comm = A->comm;
512   MPI_Request    *send_waits,*recv_waits;
513   MPI_Status     recv_status,*send_status;
514   IS             istmp;
515   PetscTruth     localdiag;
516 
517   PetscFunctionBegin;
518   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
519   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
520 
521   /*  first count number of contributors to each processor */
522   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
523   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
524   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
525   for ( i=0; i<N; i++ ) {
526     idx = rows[i];
527     found = 0;
528     for ( j=0; j<size; j++ ) {
529       if (idx >= owners[j] && idx < owners[j+1]) {
530         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
531       }
532     }
533     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
534   }
535   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
536 
537   /* inform other processors of number of messages and max length*/
538   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
539   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
540   nrecvs = work[rank];
541   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
542   nmax = work[rank];
543   PetscFree(work);
544 
545   /* post receives:   */
546   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
547   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
548   for ( i=0; i<nrecvs; i++ ) {
549     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
550   }
551 
552   /* do sends:
553       1) starts[i] gives the starting index in svalues for stuff going to
554          the ith processor
555   */
556   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
557   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
558   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
559   starts[0] = 0;
560   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
561   for ( i=0; i<N; i++ ) {
562     svalues[starts[owner[i]]++] = rows[i];
563   }
564   ISRestoreIndices(is,&rows);
565 
566   starts[0] = 0;
567   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
568   count = 0;
569   for ( i=0; i<size; i++ ) {
570     if (procs[i]) {
571       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
572     }
573   }
574   PetscFree(starts);
575 
576   base = owners[rank];
577 
578   /*  wait on receives */
579   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
580   source = lens + nrecvs;
581   count  = nrecvs; slen = 0;
582   while (count) {
583     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
584     /* unpack receives into our local space */
585     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
586     source[imdex]  = recv_status.MPI_SOURCE;
587     lens[imdex]  = n;
588     slen += n;
589     count--;
590   }
591   PetscFree(recv_waits);
592 
593   /* move the data into the send scatter */
594   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
595   count = 0;
596   for ( i=0; i<nrecvs; i++ ) {
597     values = rvalues + i*nmax;
598     for ( j=0; j<lens[i]; j++ ) {
599       lrows[count++] = values[j] - base;
600     }
601   }
602   PetscFree(rvalues); PetscFree(lens);
603   PetscFree(owner); PetscFree(nprocs);
604 
605   /* actually zap the local rows */
606   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
607   PLogObjectParent(A,istmp);
608 
609   /*
610         Zero the required rows. If the "diagonal block" of the matrix
611      is square and the user wishes to set the diagonal we use seperate
612      code so that MatSetValues() is not called for each diagonal allocating
613      new memory, thus calling lots of mallocs and slowing things down.
614 
615        Contributed by: Mathew Knepley
616   */
617   localdiag = PETSC_FALSE;
618   if (diag && (l->A->M == l->A->N)) {
619     localdiag = PETSC_TRUE;
620     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
621   } else {
622     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
623   }
624   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
625   ierr = ISDestroy(istmp); CHKERRQ(ierr);
626 
627   if (diag && (localdiag == PETSC_FALSE)) {
628     for ( i = 0; i < slen; i++ ) {
629       row = lrows[i] + rstart;
630       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
631     }
632     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
633     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
634   }
635 
636   if (diag) {
637     for ( i = 0; i < slen; i++ ) {
638       row = lrows[i] + rstart;
639       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
640     }
641     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
642     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
643   }
644   PetscFree(lrows);
645 
646   /* wait on sends */
647   if (nsends) {
648     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
649     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
650     PetscFree(send_status);
651   }
652   PetscFree(send_waits); PetscFree(svalues);
653 
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatMult_MPIAIJ"
659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
660 {
661   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
662   int        ierr,nt;
663 
664   PetscFunctionBegin;
665   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
666   if (nt != a->n) {
667     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
668   }
669   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
670   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
671   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
672   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNC__
677 #define __FUNC__ "MatMultAdd_MPIAIJ"
678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
679 {
680   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
681   int        ierr;
682 
683   PetscFunctionBegin;
684   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
685   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
686   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
687   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNC__
692 #define __FUNC__ "MatMultTrans_MPIAIJ"
693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
694 {
695   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696   int        ierr;
697 
698   PetscFunctionBegin;
699   /* do nondiagonal part */
700   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
701   /* send it on its way */
702   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
703   /* do local part */
704   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
705   /* receive remote parts: note this assumes the values are not actually */
706   /* inserted in yy until the next line, which is true for my implementation*/
707   /* but is not perhaps always true. */
708   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
717   int        ierr;
718 
719   PetscFunctionBegin;
720   /* do nondiagonal part */
721   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
722   /* send it on its way */
723   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
724   /* do local part */
725   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
726   /* receive remote parts: note this assumes the values are not actually */
727   /* inserted in yy until the next line, which is true for my implementation*/
728   /* but is not perhaps always true. */
729   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 /*
734   This only works correctly for square matrices where the subblock A->A is the
735    diagonal block
736 */
737 #undef __FUNC__
738 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
740 {
741   int        ierr;
742   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
743 
744   PetscFunctionBegin;
745   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
746   if (a->rstart != a->cstart || a->rend != a->cend) {
747     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
748   }
749   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNC__
754 #define __FUNC__ "MatScale_MPIAIJ"
755 int MatScale_MPIAIJ(Scalar *aa,Mat A)
756 {
757   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
758   int        ierr;
759 
760   PetscFunctionBegin;
761   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
762   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ "MatDestroy_MPIAIJ"
768 int MatDestroy_MPIAIJ(Mat mat)
769 {
770   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
771   int        ierr;
772 
773   PetscFunctionBegin;
774   if (--mat->refct > 0) PetscFunctionReturn(0);
775 
776   if (mat->mapping) {
777     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
778   }
779   if (mat->bmapping) {
780     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
781   }
782   if (mat->rmap) {
783     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
784   }
785   if (mat->cmap) {
786     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
787   }
788 #if defined(USE_PETSC_LOG)
789   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
790 #endif
791   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
792   PetscFree(aij->rowners);
793   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
794   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
795   if (aij->colmap) PetscFree(aij->colmap);
796   if (aij->garray) PetscFree(aij->garray);
797   if (aij->lvec)   VecDestroy(aij->lvec);
798   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
799   if (aij->rowvalues) PetscFree(aij->rowvalues);
800   PetscFree(aij);
801   PLogObjectDestroy(mat);
802   PetscHeaderDestroy(mat);
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNC__
807 #define __FUNC__ "MatView_MPIAIJ_Binary"
808 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
809 {
810   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
811   int         ierr;
812 
813   PetscFunctionBegin;
814   if (aij->size == 1) {
815     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
816   }
817   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
823 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
824 {
825   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
826   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
827   int         ierr, format,shift = C->indexshift,rank;
828   FILE        *fd;
829   ViewerType  vtype;
830 
831   PetscFunctionBegin;
832   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
833   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
834     ierr = ViewerGetFormat(viewer,&format);
835     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
836       MatInfo info;
837       int     flg;
838       MPI_Comm_rank(mat->comm,&rank);
839       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
840       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
841       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
842       PetscSequentialPhaseBegin(mat->comm,1);
843       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
844          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
845       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
846          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
847       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
848       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
849       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
850       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
851       fflush(fd);
852       PetscSequentialPhaseEnd(mat->comm,1);
853       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
854       PetscFunctionReturn(0);
855     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
856       PetscFunctionReturn(0);
857     }
858   }
859 
860   if (vtype == DRAW_VIEWER) {
861     Draw       draw;
862     PetscTruth isnull;
863     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
864     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
865   }
866 
867   if (vtype == ASCII_FILE_VIEWER) {
868     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
869     PetscSequentialPhaseBegin(mat->comm,1);
870     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
871            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
872            aij->cend);
873     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
874     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
875     fflush(fd);
876     PetscSequentialPhaseEnd(mat->comm,1);
877   } else {
878     int size = aij->size;
879     rank = aij->rank;
880     if (size == 1) {
881       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
882     } else {
883       /* assemble the entire matrix onto first processor. */
884       Mat         A;
885       Mat_SeqAIJ *Aloc;
886       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
887       Scalar      *a;
888 
889       if (!rank) {
890         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
891       } else {
892         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
893       }
894       PLogObjectParent(mat,A);
895 
896       /* copy over the A part */
897       Aloc = (Mat_SeqAIJ*) aij->A->data;
898       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
899       row = aij->rstart;
900       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
901       for ( i=0; i<m; i++ ) {
902         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
903         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
904       }
905       aj = Aloc->j;
906       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
907 
908       /* copy over the B part */
909       Aloc = (Mat_SeqAIJ*) aij->B->data;
910       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
911       row = aij->rstart;
912       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
913       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
914       for ( i=0; i<m; i++ ) {
915         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
916         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
917       }
918       PetscFree(ct);
919       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
920       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
921       /*
922          Everyone has to call to draw the matrix since the graphics waits are
923          synchronized across all processors that share the Draw object
924       */
925       if (!rank || vtype == DRAW_VIEWER) {
926         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
927       }
928       ierr = MatDestroy(A); CHKERRQ(ierr);
929     }
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNC__
935 #define __FUNC__ "MatView_MPIAIJ"
936 int MatView_MPIAIJ(Mat mat,Viewer viewer)
937 {
938   int         ierr;
939   ViewerType  vtype;
940 
941   PetscFunctionBegin;
942   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
943   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
944       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
945     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
946   } else if (vtype == BINARY_FILE_VIEWER) {
947     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
948   } else {
949     SETERRQ(1,1,"Viewer type not supported by PETSc object");
950   }
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955     This has to provide several versions.
956 
957      2) a) use only local smoothing updating outer values only once.
958         b) local smoothing updating outer values each inner iteration
959      3) color updating out values betwen colors.
960 */
961 #undef __FUNC__
962 #define __FUNC__ "MatRelax_MPIAIJ"
963 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
964                            double fshift,int its,Vec xx)
965 {
966   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
967   Mat        AA = mat->A, BB = mat->B;
968   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
969   Scalar     *b,*x,*xs,*ls,d,*v,sum;
970   int        ierr,*idx, *diag;
971   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
972 
973   PetscFunctionBegin;
974   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
975   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
976   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
977   xs = x + shift; /* shift by one for index start of 1 */
978   ls = ls + shift;
979   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
980   diag = A->diag;
981   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
982     if (flag & SOR_ZERO_INITIAL_GUESS) {
983       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
984       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
985       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
986       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
987       PetscFunctionReturn(0);
988     }
989     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
990     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
991     while (its--) {
992       /* go down through the rows */
993       for ( i=0; i<m; i++ ) {
994         n    = A->i[i+1] - A->i[i];
995 	PLogFlops(4*n+3);
996         idx  = A->j + A->i[i] + shift;
997         v    = A->a + A->i[i] + shift;
998         sum  = b[i];
999         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1000         d    = fshift + A->a[diag[i]+shift];
1001         n    = B->i[i+1] - B->i[i];
1002         idx  = B->j + B->i[i] + shift;
1003         v    = B->a + B->i[i] + shift;
1004         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1005         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1006       }
1007       /* come up through the rows */
1008       for ( i=m-1; i>-1; i-- ) {
1009         n    = A->i[i+1] - A->i[i];
1010 	PLogFlops(4*n+3)
1011         idx  = A->j + A->i[i] + shift;
1012         v    = A->a + A->i[i] + shift;
1013         sum  = b[i];
1014         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1015         d    = fshift + A->a[diag[i]+shift];
1016         n    = B->i[i+1] - B->i[i];
1017         idx  = B->j + B->i[i] + shift;
1018         v    = B->a + B->i[i] + shift;
1019         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1020         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1021       }
1022     }
1023   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024     if (flag & SOR_ZERO_INITIAL_GUESS) {
1025       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1026       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1027       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1028       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     }
1031     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1033     while (its--) {
1034       for ( i=0; i<m; i++ ) {
1035         n    = A->i[i+1] - A->i[i];
1036 	PLogFlops(4*n+3);
1037         idx  = A->j + A->i[i] + shift;
1038         v    = A->a + A->i[i] + shift;
1039         sum  = b[i];
1040         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1041         d    = fshift + A->a[diag[i]+shift];
1042         n    = B->i[i+1] - B->i[i];
1043         idx  = B->j + B->i[i] + shift;
1044         v    = B->a + B->i[i] + shift;
1045         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1046         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1047       }
1048     }
1049   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1050     if (flag & SOR_ZERO_INITIAL_GUESS) {
1051       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1052       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1053       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1054       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1055       PetscFunctionReturn(0);
1056     }
1057     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059     while (its--) {
1060       for ( i=m-1; i>-1; i-- ) {
1061         n    = A->i[i+1] - A->i[i];
1062 	PLogFlops(4*n+3);
1063         idx  = A->j + A->i[i] + shift;
1064         v    = A->a + A->i[i] + shift;
1065         sum  = b[i];
1066         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1067         d    = fshift + A->a[diag[i]+shift];
1068         n    = B->i[i+1] - B->i[i];
1069         idx  = B->j + B->i[i] + shift;
1070         v    = B->a + B->i[i] + shift;
1071         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1072         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1073       }
1074     }
1075   } else {
1076     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1077   }
1078   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1079   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1080   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNC__
1085 #define __FUNC__ "MatGetInfo_MPIAIJ"
1086 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1087 {
1088   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1089   Mat        A = mat->A, B = mat->B;
1090   int        ierr;
1091   double     isend[5], irecv[5];
1092 
1093   PetscFunctionBegin;
1094   info->block_size     = 1.0;
1095   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1096   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1097   isend[3] = info->memory;  isend[4] = info->mallocs;
1098   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1099   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1100   isend[3] += info->memory;  isend[4] += info->mallocs;
1101   if (flag == MAT_LOCAL) {
1102     info->nz_used      = isend[0];
1103     info->nz_allocated = isend[1];
1104     info->nz_unneeded  = isend[2];
1105     info->memory       = isend[3];
1106     info->mallocs      = isend[4];
1107   } else if (flag == MAT_GLOBAL_MAX) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else if (flag == MAT_GLOBAL_SUM) {
1115     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1116     info->nz_used      = irecv[0];
1117     info->nz_allocated = irecv[1];
1118     info->nz_unneeded  = irecv[2];
1119     info->memory       = irecv[3];
1120     info->mallocs      = irecv[4];
1121   }
1122   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1123   info->fill_ratio_needed = 0;
1124   info->factor_mallocs    = 0;
1125   info->rows_global       = (double)mat->M;
1126   info->columns_global    = (double)mat->N;
1127   info->rows_local        = (double)mat->m;
1128   info->columns_local     = (double)mat->N;
1129 
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNC__
1134 #define __FUNC__ "MatSetOption_MPIAIJ"
1135 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1136 {
1137   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1138 
1139   PetscFunctionBegin;
1140   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1141       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1142       op == MAT_COLUMNS_UNSORTED ||
1143       op == MAT_COLUMNS_SORTED ||
1144       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1145       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1146         MatSetOption(a->A,op);
1147         MatSetOption(a->B,op);
1148   } else if (op == MAT_ROW_ORIENTED) {
1149     a->roworiented = 1;
1150     MatSetOption(a->A,op);
1151     MatSetOption(a->B,op);
1152   } else if (op == MAT_ROWS_SORTED ||
1153              op == MAT_ROWS_UNSORTED ||
1154              op == MAT_SYMMETRIC ||
1155              op == MAT_STRUCTURALLY_SYMMETRIC ||
1156              op == MAT_YES_NEW_DIAGONALS)
1157     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1158   else if (op == MAT_COLUMN_ORIENTED) {
1159     a->roworiented = 0;
1160     MatSetOption(a->A,op);
1161     MatSetOption(a->B,op);
1162   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1163     a->donotstash = 1;
1164   } else if (op == MAT_NO_NEW_DIAGONALS){
1165     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1166   } else {
1167     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ "MatGetSize_MPIAIJ"
1174 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1175 {
1176   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1177 
1178   PetscFunctionBegin;
1179   if (m) *m = mat->M;
1180   if (n) *n = mat->N;
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNC__
1185 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1186 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1187 {
1188   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1189 
1190   PetscFunctionBegin;
1191   if (m) *m = mat->m;
1192   if (n) *n = mat->n;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNC__
1197 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1198 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1199 {
1200   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1201 
1202   PetscFunctionBegin;
1203   *m = mat->rstart; *n = mat->rend;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1208 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatGetRow_MPIAIJ"
1212 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1213 {
1214   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1215   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1216   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1217   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1218   int        *cmap, *idx_p;
1219 
1220   PetscFunctionBegin;
1221   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1222   mat->getrowactive = PETSC_TRUE;
1223 
1224   if (!mat->rowvalues && (idx || v)) {
1225     /*
1226         allocate enough space to hold information from the longest row.
1227     */
1228     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1229     int     max = 1,m = mat->m,tmp;
1230     for ( i=0; i<m; i++ ) {
1231       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1232       if (max < tmp) { max = tmp; }
1233     }
1234     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1235     mat->rowindices = (int *) (mat->rowvalues + max);
1236   }
1237 
1238   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1239   lrow = row - rstart;
1240 
1241   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1242   if (!v)   {pvA = 0; pvB = 0;}
1243   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1244   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1245   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1246   nztot = nzA + nzB;
1247 
1248   cmap  = mat->garray;
1249   if (v  || idx) {
1250     if (nztot) {
1251       /* Sort by increasing column numbers, assuming A and B already sorted */
1252       int imark = -1;
1253       if (v) {
1254         *v = v_p = mat->rowvalues;
1255         for ( i=0; i<nzB; i++ ) {
1256           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1257           else break;
1258         }
1259         imark = i;
1260         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1261         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1262       }
1263       if (idx) {
1264         *idx = idx_p = mat->rowindices;
1265         if (imark > -1) {
1266           for ( i=0; i<imark; i++ ) {
1267             idx_p[i] = cmap[cworkB[i]];
1268           }
1269         } else {
1270           for ( i=0; i<nzB; i++ ) {
1271             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1272             else break;
1273           }
1274           imark = i;
1275         }
1276         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1277         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1278       }
1279     } else {
1280       if (idx) *idx = 0;
1281       if (v)   *v   = 0;
1282     }
1283   }
1284   *nz = nztot;
1285   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1286   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNC__
1291 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1292 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1293 {
1294   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1295 
1296   PetscFunctionBegin;
1297   if (aij->getrowactive == PETSC_FALSE) {
1298     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1299   }
1300   aij->getrowactive = PETSC_FALSE;
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNC__
1305 #define __FUNC__ "MatNorm_MPIAIJ"
1306 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1307 {
1308   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1309   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1310   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1311   double     sum = 0.0;
1312   Scalar     *v;
1313 
1314   PetscFunctionBegin;
1315   if (aij->size == 1) {
1316     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1317   } else {
1318     if (type == NORM_FROBENIUS) {
1319       v = amat->a;
1320       for (i=0; i<amat->nz; i++ ) {
1321 #if defined(USE_PETSC_COMPLEX)
1322         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1323 #else
1324         sum += (*v)*(*v); v++;
1325 #endif
1326       }
1327       v = bmat->a;
1328       for (i=0; i<bmat->nz; i++ ) {
1329 #if defined(USE_PETSC_COMPLEX)
1330         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1331 #else
1332         sum += (*v)*(*v); v++;
1333 #endif
1334       }
1335       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1336       *norm = sqrt(*norm);
1337     } else if (type == NORM_1) { /* max column norm */
1338       double *tmp, *tmp2;
1339       int    *jj, *garray = aij->garray;
1340       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1341       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1342       PetscMemzero(tmp,aij->N*sizeof(double));
1343       *norm = 0.0;
1344       v = amat->a; jj = amat->j;
1345       for ( j=0; j<amat->nz; j++ ) {
1346         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1347       }
1348       v = bmat->a; jj = bmat->j;
1349       for ( j=0; j<bmat->nz; j++ ) {
1350         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1351       }
1352       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1353       for ( j=0; j<aij->N; j++ ) {
1354         if (tmp2[j] > *norm) *norm = tmp2[j];
1355       }
1356       PetscFree(tmp); PetscFree(tmp2);
1357     } else if (type == NORM_INFINITY) { /* max row norm */
1358       double ntemp = 0.0;
1359       for ( j=0; j<amat->m; j++ ) {
1360         v = amat->a + amat->i[j] + shift;
1361         sum = 0.0;
1362         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1363           sum += PetscAbsScalar(*v); v++;
1364         }
1365         v = bmat->a + bmat->i[j] + shift;
1366         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         if (sum > ntemp) ntemp = sum;
1370       }
1371       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1372     } else {
1373       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1374     }
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNC__
1380 #define __FUNC__ "MatTranspose_MPIAIJ"
1381 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1382 {
1383   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1384   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1385   int        ierr,shift = Aloc->indexshift;
1386   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1387   Mat        B;
1388   Scalar     *array;
1389 
1390   PetscFunctionBegin;
1391   if (matout == PETSC_NULL && M != N) {
1392     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1393   }
1394 
1395   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1396 
1397   /* copy over the A part */
1398   Aloc = (Mat_SeqAIJ*) a->A->data;
1399   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1400   row = a->rstart;
1401   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1402   for ( i=0; i<m; i++ ) {
1403     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1404     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1405   }
1406   aj = Aloc->j;
1407   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1408 
1409   /* copy over the B part */
1410   Aloc = (Mat_SeqAIJ*) a->B->data;
1411   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1412   row = a->rstart;
1413   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1414   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1415   for ( i=0; i<m; i++ ) {
1416     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1417     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1418   }
1419   PetscFree(ct);
1420   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1421   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1422   if (matout != PETSC_NULL) {
1423     *matout = B;
1424   } else {
1425     PetscOps       *Abops;
1426     struct _MatOps *Aops;
1427 
1428     /* This isn't really an in-place transpose .... but free data structures from a */
1429     PetscFree(a->rowners);
1430     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1431     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1432     if (a->colmap) PetscFree(a->colmap);
1433     if (a->garray) PetscFree(a->garray);
1434     if (a->lvec) VecDestroy(a->lvec);
1435     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1436     PetscFree(a);
1437 
1438     /*
1439        This is horrible, horrible code. We need to keep the
1440       A pointers for the bops and ops but copy everything
1441       else from C.
1442     */
1443     Abops = A->bops;
1444     Aops  = A->ops;
1445     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1446     A->bops = Abops;
1447     A->ops  = Aops;
1448     PetscHeaderDestroy(B);
1449   }
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNC__
1454 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1455 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1456 {
1457   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1458   Mat a = aij->A, b = aij->B;
1459   int ierr,s1,s2,s3;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1463   if (rr) {
1464     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1465     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1466     /* Overlap communication with computation. */
1467     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1468   }
1469   if (ll) {
1470     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1471     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1472     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1473   }
1474   /* scale  the diagonal block */
1475   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1476 
1477   if (rr) {
1478     /* Do a scatter end and then right scale the off-diagonal block */
1479     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1480     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1481   }
1482 
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 
1487 extern int MatPrintHelp_SeqAIJ(Mat);
1488 #undef __FUNC__
1489 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1490 int MatPrintHelp_MPIAIJ(Mat A)
1491 {
1492   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1493   int        ierr;
1494 
1495   PetscFunctionBegin;
1496   if (!a->rank) {
1497     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNC__
1503 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1504 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1505 {
1506   PetscFunctionBegin;
1507   *bs = 1;
1508   PetscFunctionReturn(0);
1509 }
1510 #undef __FUNC__
1511 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1512 int MatSetUnfactored_MPIAIJ(Mat A)
1513 {
1514   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1515   int        ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNC__
1523 #define __FUNC__ "MatEqual_MPIAIJ"
1524 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1525 {
1526   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1527   Mat        a, b, c, d;
1528   PetscTruth flg;
1529   int        ierr;
1530 
1531   PetscFunctionBegin;
1532   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1533   a = matA->A; b = matA->B;
1534   c = matB->A; d = matB->B;
1535 
1536   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1537   if (flg == PETSC_TRUE) {
1538     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1539   }
1540   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNC__
1545 #define __FUNC__ "MatCopy_MPIAIJ"
1546 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1547 {
1548   int        ierr;
1549   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1550   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1551 
1552   PetscFunctionBegin;
1553   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1554     /* because of the column compression in the off-processor part of the matrix a->B,
1555        the number of columns in a->B and b->B may be different, hence we cannot call
1556        the MatCopy() directly on the two parts. If need be, we can provide a more
1557        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1558        then copying the submatrices */
1559     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1560   } else {
1561     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1562     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1568 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1569 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1570 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1571 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1572 
1573 /* -------------------------------------------------------------------*/
1574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1575        MatGetRow_MPIAIJ,
1576        MatRestoreRow_MPIAIJ,
1577        MatMult_MPIAIJ,
1578        MatMultAdd_MPIAIJ,
1579        MatMultTrans_MPIAIJ,
1580        MatMultTransAdd_MPIAIJ,
1581        0,
1582        0,
1583        0,
1584        0,
1585        0,
1586        0,
1587        MatRelax_MPIAIJ,
1588        MatTranspose_MPIAIJ,
1589        MatGetInfo_MPIAIJ,
1590        MatEqual_MPIAIJ,
1591        MatGetDiagonal_MPIAIJ,
1592        MatDiagonalScale_MPIAIJ,
1593        MatNorm_MPIAIJ,
1594        MatAssemblyBegin_MPIAIJ,
1595        MatAssemblyEnd_MPIAIJ,
1596        0,
1597        MatSetOption_MPIAIJ,
1598        MatZeroEntries_MPIAIJ,
1599        MatZeroRows_MPIAIJ,
1600        0,
1601        0,
1602        0,
1603        0,
1604        MatGetSize_MPIAIJ,
1605        MatGetLocalSize_MPIAIJ,
1606        MatGetOwnershipRange_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610        0,
1611        MatDuplicate_MPIAIJ,
1612        0,
1613        0,
1614        0,
1615        0,
1616        0,
1617        MatGetSubMatrices_MPIAIJ,
1618        MatIncreaseOverlap_MPIAIJ,
1619        MatGetValues_MPIAIJ,
1620        MatCopy_MPIAIJ,
1621        MatPrintHelp_MPIAIJ,
1622        MatScale_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626        MatGetBlockSize_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631        MatFDColoringCreate_MPIAIJ,
1632        0,
1633        MatSetUnfactored_MPIAIJ,
1634        0,
1635        0,
1636        MatGetSubMatrix_MPIAIJ,
1637        0,
1638        0,
1639        MatGetMaps_Petsc};
1640 
1641 /* ----------------------------------------------------------------------------------------*/
1642 
1643 #undef __FUNC__
1644 #define __FUNC__ "MatStoreValues_MPIAIJ"
1645 int MatStoreValues_MPIAIJ(Mat mat)
1646 {
1647   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1648   int        ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1652   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNC__
1657 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1658 int MatRetrieveValues_MPIAIJ(Mat mat)
1659 {
1660   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1661   int        ierr;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1665   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNC__
1670 #define __FUNC__ "MatCreateMPIAIJ"
1671 /*@C
1672    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1673    (the default parallel PETSc format).  For good matrix assembly performance
1674    the user should preallocate the matrix storage by setting the parameters
1675    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1676    performance can be increased by more than a factor of 50.
1677 
1678    Collective on MPI_Comm
1679 
1680    Input Parameters:
1681 +  comm - MPI communicator
1682 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1683            This value should be the same as the local size used in creating the
1684            y vector for the matrix-vector product y = Ax.
1685 .  n - This value should be the same as the local size used in creating the
1686        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1687        calculated if N is given) For square matrices n is almost always m.
1688 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1689 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1690 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1691            (same for all local rows)
1692 .  d_nzz - array containing the number of nonzeros in the various rows of the
1693            diagonal portion of the local submatrix (possibly different for each row)
1694            or PETSC_NULL. You must leave room for the diagonal entry even if
1695            it is zero.
1696 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1697            submatrix (same for all local rows).
1698 -  o_nzz - array containing the number of nonzeros in the various rows of the
1699            off-diagonal portion of the local submatrix (possibly different for
1700            each row) or PETSC_NULL.
1701 
1702    Output Parameter:
1703 .  A - the matrix
1704 
1705    Notes:
1706    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1707    than it must be used on all processors that share the object for that argument.
1708 
1709    The AIJ format (also called the Yale sparse matrix format or
1710    compressed row storage), is fully compatible with standard Fortran 77
1711    storage.  That is, the stored row and column indices can begin at
1712    either one (as in Fortran) or zero.  See the users manual for details.
1713 
1714    The user MUST specify either the local or global matrix dimensions
1715    (possibly both).
1716 
1717    By default, this format uses inodes (identical nodes) when possible.
1718    We search for consecutive rows with the same nonzero structure, thereby
1719    reusing matrix information to achieve increased efficiency.
1720 
1721    Options Database Keys:
1722 +  -mat_aij_no_inode  - Do not use inodes
1723 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1724 -  -mat_aij_oneindex - Internally use indexing starting at 1
1725         rather than 0.  Note that when calling MatSetValues(),
1726         the user still MUST index entries starting at 0!
1727 .   -mat_mpi - use the parallel matrix data structures even on one processor
1728                (defaults to using SeqBAIJ format on one processor)
1729 .   -mat_mpi - use the parallel matrix data structures even on one processor
1730                (defaults to using SeqAIJ format on one processor)
1731 
1732 
1733    Storage Information:
1734    For a square global matrix we define each processor's diagonal portion
1735    to be its local rows and the corresponding columns (a square submatrix);
1736    each processor's off-diagonal portion encompasses the remainder of the
1737    local matrix (a rectangular submatrix).
1738 
1739    The user can specify preallocated storage for the diagonal part of
1740    the local submatrix with either d_nz or d_nnz (not both).  Set
1741    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1742    memory allocation.  Likewise, specify preallocated storage for the
1743    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1744 
1745    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1746    the figure below we depict these three local rows and all columns (0-11).
1747 
1748 .vb
1749              0 1 2 3 4 5 6 7 8 9 10 11
1750             -------------------
1751      row 3  |  o o o d d d o o o o o o
1752      row 4  |  o o o d d d o o o o o o
1753      row 5  |  o o o d d d o o o o o o
1754             -------------------
1755 .ve
1756 
1757    Thus, any entries in the d locations are stored in the d (diagonal)
1758    submatrix, and any entries in the o locations are stored in the
1759    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1760    stored simply in the MATSEQAIJ format for compressed row storage.
1761 
1762    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1763    and o_nz should indicate the number of nonzeros per row in the o matrix.
1764    In general, for PDE problems in which most nonzeros are near the diagonal,
1765    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1766    or you will get TERRIBLE performance; see the users' manual chapter on
1767    matrices.
1768 
1769 .keywords: matrix, aij, compressed row, sparse, parallel
1770 
1771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1772 @*/
1773 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1774 {
1775   Mat          B;
1776   Mat_MPIAIJ   *b;
1777   int          ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0;
1778 
1779   PetscFunctionBegin;
1780   MPI_Comm_size(comm,&size);
1781   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1782   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1783   if (!flag1 && !flag2 && size == 1) {
1784     if (M == PETSC_DECIDE) M = m;
1785     if (N == PETSC_DECIDE) N = n;
1786     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1787     PetscFunctionReturn(0);
1788   }
1789 
1790   *A = 0;
1791   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1792   PLogObjectCreate(B);
1793   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1794   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1795   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1796   B->ops->destroy    = MatDestroy_MPIAIJ;
1797   B->ops->view       = MatView_MPIAIJ;
1798   B->factor          = 0;
1799   B->assembled       = PETSC_FALSE;
1800   B->mapping         = 0;
1801 
1802   B->insertmode      = NOT_SET_VALUES;
1803   b->size            = size;
1804   MPI_Comm_rank(comm,&b->rank);
1805 
1806   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1807     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1808   }
1809 
1810   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1811     work[0] = m; work[1] = n;
1812     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1813     if (M == PETSC_DECIDE) M = sum[0];
1814     if (N == PETSC_DECIDE) N = sum[1];
1815   }
1816   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1817   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1818   b->m = m; B->m = m;
1819   b->n = n; B->n = n;
1820   b->N = N; B->N = N;
1821   b->M = M; B->M = M;
1822 
1823   /* the information in the maps duplicates the information computed below, eventually
1824      we should remove the duplicate information that is not contained in the maps */
1825   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1826   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1827 
1828   /* build local table of row and column ownerships */
1829   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1830   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1831   b->cowners = b->rowners + b->size + 2;
1832   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1833   b->rowners[0] = 0;
1834   for ( i=2; i<=b->size; i++ ) {
1835     b->rowners[i] += b->rowners[i-1];
1836   }
1837   b->rstart = b->rowners[b->rank];
1838   b->rend   = b->rowners[b->rank+1];
1839   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1840   b->cowners[0] = 0;
1841   for ( i=2; i<=b->size; i++ ) {
1842     b->cowners[i] += b->cowners[i-1];
1843   }
1844   b->cstart = b->cowners[b->rank];
1845   b->cend   = b->cowners[b->rank+1];
1846 
1847   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1848   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1849   PLogObjectParent(B,b->A);
1850   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1851   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1852   PLogObjectParent(B,b->B);
1853 
1854   /* build cache for off array entries formed */
1855   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1856   b->donotstash  = 0;
1857   b->colmap      = 0;
1858   b->garray      = 0;
1859   b->roworiented = 1;
1860 
1861   /* stuff used for matrix vector multiply */
1862   b->lvec      = 0;
1863   b->Mvctx     = 0;
1864 
1865   /* stuff for MatGetRow() */
1866   b->rowindices   = 0;
1867   b->rowvalues    = 0;
1868   b->getrowactive = PETSC_FALSE;
1869 
1870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1871                                      "MatStoreValues_MPIAIJ",
1872                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1874                                      "MatRetrieveValues_MPIAIJ",
1875                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1876   *A = B;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNC__
1881 #define __FUNC__ "MatDuplicate_MPIAIJ"
1882 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1883 {
1884   Mat        mat;
1885   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1886   int        ierr, len=0, flg;
1887 
1888   PetscFunctionBegin;
1889   *newmat       = 0;
1890   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1891   PLogObjectCreate(mat);
1892   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1893   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1894   mat->ops->destroy    = MatDestroy_MPIAIJ;
1895   mat->ops->view       = MatView_MPIAIJ;
1896   mat->factor     = matin->factor;
1897   mat->assembled  = PETSC_TRUE;
1898 
1899   a->m = mat->m   = oldmat->m;
1900   a->n = mat->n   = oldmat->n;
1901   a->M = mat->M   = oldmat->M;
1902   a->N = mat->N   = oldmat->N;
1903 
1904   a->rstart       = oldmat->rstart;
1905   a->rend         = oldmat->rend;
1906   a->cstart       = oldmat->cstart;
1907   a->cend         = oldmat->cend;
1908   a->size         = oldmat->size;
1909   a->rank         = oldmat->rank;
1910   mat->insertmode = NOT_SET_VALUES;
1911   a->rowvalues    = 0;
1912   a->getrowactive = PETSC_FALSE;
1913 
1914   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1915   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1916   a->cowners = a->rowners + a->size + 2;
1917   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1918   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1919   if (oldmat->colmap) {
1920     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1921     PLogObjectMemory(mat,(a->N)*sizeof(int));
1922     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1923   } else a->colmap = 0;
1924   if (oldmat->garray) {
1925     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1926     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1927     PLogObjectMemory(mat,len*sizeof(int));
1928     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1929   } else a->garray = 0;
1930 
1931   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1932   PLogObjectParent(mat,a->lvec);
1933   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1934   PLogObjectParent(mat,a->Mvctx);
1935   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1936   PLogObjectParent(mat,a->A);
1937   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
1938   PLogObjectParent(mat,a->B);
1939   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1940   if (flg) {
1941     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1942   }
1943   *newmat = mat;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #include "sys.h"
1948 
1949 #undef __FUNC__
1950 #define __FUNC__ "MatLoad_MPIAIJ"
1951 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1952 {
1953   Mat          A;
1954   Scalar       *vals,*svals;
1955   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1956   MPI_Status   status;
1957   int          i, nz, ierr, j,rstart, rend, fd;
1958   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1959   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1960   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1961 
1962   PetscFunctionBegin;
1963   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1964   if (!rank) {
1965     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1966     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1967     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1968     if (header[3] < 0) {
1969       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1970     }
1971   }
1972 
1973   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1974   M = header[1]; N = header[2];
1975   /* determine ownership of all rows */
1976   m = M/size + ((M % size) > rank);
1977   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1978   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1979   rowners[0] = 0;
1980   for ( i=2; i<=size; i++ ) {
1981     rowners[i] += rowners[i-1];
1982   }
1983   rstart = rowners[rank];
1984   rend   = rowners[rank+1];
1985 
1986   /* distribute row lengths to all processors */
1987   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1988   offlens = ourlens + (rend-rstart);
1989   if (!rank) {
1990     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1991     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1992     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1993     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1994     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1995     PetscFree(sndcounts);
1996   } else {
1997     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1998   }
1999 
2000   if (!rank) {
2001     /* calculate the number of nonzeros on each processor */
2002     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2003     PetscMemzero(procsnz,size*sizeof(int));
2004     for ( i=0; i<size; i++ ) {
2005       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2006         procsnz[i] += rowlengths[j];
2007       }
2008     }
2009     PetscFree(rowlengths);
2010 
2011     /* determine max buffer needed and allocate it */
2012     maxnz = 0;
2013     for ( i=0; i<size; i++ ) {
2014       maxnz = PetscMax(maxnz,procsnz[i]);
2015     }
2016     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2017 
2018     /* read in my part of the matrix column indices  */
2019     nz     = procsnz[0];
2020     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2021     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2022 
2023     /* read in every one elses and ship off */
2024     for ( i=1; i<size; i++ ) {
2025       nz   = procsnz[i];
2026       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2027       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2028     }
2029     PetscFree(cols);
2030   } else {
2031     /* determine buffer space needed for message */
2032     nz = 0;
2033     for ( i=0; i<m; i++ ) {
2034       nz += ourlens[i];
2035     }
2036     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2037 
2038     /* receive message of column indices*/
2039     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2040     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2041     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2042   }
2043 
2044   /* determine column ownership if matrix is not square */
2045   if (N != M) {
2046     n      = N/size + ((N % size) > rank);
2047     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2048     cstart = cend - n;
2049   } else {
2050     cstart = rstart;
2051     cend   = rend;
2052   }
2053 
2054   /* loop over local rows, determining number of off diagonal entries */
2055   PetscMemzero(offlens,m*sizeof(int));
2056   jj = 0;
2057   for ( i=0; i<m; i++ ) {
2058     for ( j=0; j<ourlens[i]; j++ ) {
2059       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2060       jj++;
2061     }
2062   }
2063 
2064   /* create our matrix */
2065   for ( i=0; i<m; i++ ) {
2066     ourlens[i] -= offlens[i];
2067   }
2068   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2069   A = *newmat;
2070   MatSetOption(A,MAT_COLUMNS_SORTED);
2071   for ( i=0; i<m; i++ ) {
2072     ourlens[i] += offlens[i];
2073   }
2074 
2075   if (!rank) {
2076     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2077 
2078     /* read in my part of the matrix numerical values  */
2079     nz = procsnz[0];
2080     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2081 
2082     /* insert into matrix */
2083     jj      = rstart;
2084     smycols = mycols;
2085     svals   = vals;
2086     for ( i=0; i<m; i++ ) {
2087       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2088       smycols += ourlens[i];
2089       svals   += ourlens[i];
2090       jj++;
2091     }
2092 
2093     /* read in other processors and ship out */
2094     for ( i=1; i<size; i++ ) {
2095       nz   = procsnz[i];
2096       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2097       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2098     }
2099     PetscFree(procsnz);
2100   } else {
2101     /* receive numeric values */
2102     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2103 
2104     /* receive message of values*/
2105     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2106     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2107     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2108 
2109     /* insert into matrix */
2110     jj      = rstart;
2111     smycols = mycols;
2112     svals   = vals;
2113     for ( i=0; i<m; i++ ) {
2114       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2115       smycols += ourlens[i];
2116       svals   += ourlens[i];
2117       jj++;
2118     }
2119   }
2120   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2121 
2122   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2123   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 #undef __FUNC__
2128 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2129 /*
2130     Not great since it makes two copies of the submatrix, first an SeqAIJ
2131   in local and then by concatenating the local matrices the end result.
2132   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2133 */
2134 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
2135 {
2136   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2137   Mat        *local,M, Mreuse;
2138   Scalar     *vwork,*aa;
2139   MPI_Comm   comm = mat->comm;
2140   Mat_SeqAIJ *aij;
2141   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2142 
2143   PetscFunctionBegin;
2144   MPI_Comm_rank(comm,&rank);
2145   MPI_Comm_size(comm,&size);
2146 
2147   if (call ==  MAT_REUSE_MATRIX) {
2148     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2149     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2150     local = &Mreuse;
2151     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2152   } else {
2153     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2154     Mreuse = *local;
2155     PetscFree(local);
2156   }
2157 
2158   /*
2159       m - number of local rows
2160       n - number of columns (same on all processors)
2161       rstart - first row in new global matrix generated
2162   */
2163   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2164   if (call == MAT_INITIAL_MATRIX) {
2165     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2166     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2167     ii  = aij->i;
2168     jj  = aij->j;
2169 
2170     /*
2171         Determine the number of non-zeros in the diagonal and off-diagonal
2172         portions of the matrix in order to do correct preallocation
2173     */
2174 
2175     /* first get start and end of "diagonal" columns */
2176     if (csize == PETSC_DECIDE) {
2177       nlocal = n/size + ((n % size) > rank);
2178     } else {
2179       nlocal = csize;
2180     }
2181     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2182     rstart = rend - nlocal;
2183     if (rank == size - 1 && rend != n) {
2184       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2185     }
2186 
2187     /* next, compute all the lengths */
2188     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2189     olens = dlens + m;
2190     for ( i=0; i<m; i++ ) {
2191       jend = ii[i+1] - ii[i];
2192       olen = 0;
2193       dlen = 0;
2194       for ( j=0; j<jend; j++ ) {
2195         if ( *jj < rstart || *jj >= rend) olen++;
2196         else dlen++;
2197         jj++;
2198       }
2199       olens[i] = olen;
2200       dlens[i] = dlen;
2201     }
2202     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2203     PetscFree(dlens);
2204   } else {
2205     int ml,nl;
2206 
2207     M = *newmat;
2208     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2209     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2210     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2211     /*
2212          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2213        rather than the slower MatSetValues().
2214     */
2215     M->was_assembled = PETSC_TRUE;
2216     M->assembled     = PETSC_FALSE;
2217   }
2218   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2219   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2220   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2221   ii  = aij->i;
2222   jj  = aij->j;
2223   aa  = aij->a;
2224   for (i=0; i<m; i++) {
2225     row   = rstart + i;
2226     nz    = ii[i+1] - ii[i];
2227     cwork = jj;     jj += nz;
2228     vwork = aa;     aa += nz;
2229     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2230   }
2231 
2232   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2233   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2234   *newmat = M;
2235 
2236   /* save submatrix used in processor for next request */
2237   if (call ==  MAT_INITIAL_MATRIX) {
2238     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2239     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2240   }
2241 
2242   PetscFunctionReturn(0);
2243 }
2244