xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 99dcc5326efaeeb96f7fc5515ee10e547e6b62bd)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.273 1999/01/11 16:21:28 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 #if defined (USE_CTABLE)
22   int        ierr;
23 #endif
24 
25   PetscFunctionBegin;
26 #if defined (USE_CTABLE)
27   ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr);
28   for ( i=0; i<n; i++ ){
29     ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr);
30   }
31 #else
32   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
33   PLogObjectMemory(mat,aij->N*sizeof(int));
34   PetscMemzero(aij->colmap,aij->N*sizeof(int));
35   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
36 #endif
37   PetscFunctionReturn(0);
38 }
39 
40 extern int DisAssemble_MPIAIJ(Mat);
41 
42 #define CHUNKSIZE   15
43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
44 { \
45  \
46     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
47     rmax = aimax[row]; nrow = ailen[row];  \
48     col1 = col - shift; \
49      \
50     low = 0; high = nrow; \
51     while (high-low > 5) { \
52       t = (low+high)/2; \
53       if (rp[t] > col) high = t; \
54       else             low  = t; \
55     } \
56       for ( _i=0; _i<nrow; _i++ ) { \
57         if (rp[_i] > col1) break; \
58         if (rp[_i] == col1) { \
59           if (addv == ADD_VALUES) ap[_i] += value;   \
60           else                  ap[_i] = value; \
61           goto a_noinsert; \
62         } \
63       }  \
64       if (nonew == 1) goto a_noinsert; \
65       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
66       if (nrow >= rmax) { \
67         /* there is no extra room in row, therefore enlarge */ \
68         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
69         Scalar *new_a; \
70  \
71         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
72  \
73         /* malloc new storage space */ \
74         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
75         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
76         new_j   = (int *) (new_a + new_nz); \
77         new_i   = new_j + new_nz; \
78  \
79         /* copy over old data into new slots */ \
80         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
81         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
82         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
83         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
84         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
85                                                            len*sizeof(int)); \
86         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
87         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
88                                                            len*sizeof(Scalar));  \
89         /* free up old matrix storage */ \
90  \
91         PetscFree(a->a);  \
92         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
93         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
94         a->singlemalloc = 1; \
95  \
96         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
97         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
98         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
99         a->maxnz += CHUNKSIZE; \
100         a->reallocs++; \
101       } \
102       N = nrow++ - 1; a->nz++; \
103       /* shift up all the later entries in this row */ \
104       for ( ii=N; ii>=_i; ii-- ) { \
105         rp[ii+1] = rp[ii]; \
106         ap[ii+1] = ap[ii]; \
107       } \
108       rp[_i] = col1;  \
109       ap[_i] = value;  \
110       a_noinsert: ; \
111       ailen[row] = nrow; \
112 }
113 
114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
118     rmax = bimax[row]; nrow = bilen[row];  \
119     col1 = col - shift; \
120      \
121     low = 0; high = nrow; \
122     while (high-low > 5) { \
123       t = (low+high)/2; \
124       if (rp[t] > col) high = t; \
125       else             low  = t; \
126     } \
127        for ( _i=0; _i<nrow; _i++ ) { \
128         if (rp[_i] > col1) break; \
129         if (rp[_i] == col1) { \
130           if (addv == ADD_VALUES) ap[_i] += value;   \
131           else                  ap[_i] = value; \
132           goto b_noinsert; \
133         } \
134       }  \
135       if (nonew == 1) goto b_noinsert; \
136       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
137       if (nrow >= rmax) { \
138         /* there is no extra room in row, therefore enlarge */ \
139         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
140         Scalar *new_a; \
141  \
142         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
143  \
144         /* malloc new storage space */ \
145         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
146         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
147         new_j   = (int *) (new_a + new_nz); \
148         new_i   = new_j + new_nz; \
149  \
150         /* copy over old data into new slots */ \
151         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
152         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
153         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
154         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
155         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
156                                                            len*sizeof(int)); \
157         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
158         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
159                                                            len*sizeof(Scalar));  \
160         /* free up old matrix storage */ \
161  \
162         PetscFree(b->a);  \
163         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = 1; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for ( ii=N; ii>=_i; ii-- ) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
186 #undef __FUNC__
187 #define __FUNC__ "MatSetValues_MPIAIJ"
188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
189 {
190   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
191   Scalar     value;
192   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
193   int        cstart = aij->cstart, cend = aij->cend,row,col;
194   int        roworiented = aij->roworiented;
195 
196   /* Some Variables required in the macro */
197   Mat        A = aij->A;
198   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
199   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
200   Scalar     *aa = a->a;
201 
202   Mat        B = aij->B;
203   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
204   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
205   Scalar     *ba = b->a;
206 
207   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
208   int        nonew = a->nonew,shift = a->indexshift;
209   Scalar     *ap;
210 
211   PetscFunctionBegin;
212   for ( i=0; i<m; i++ ) {
213 #if defined(USE_PETSC_BOPT_g)
214     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
215     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
216 #endif
217     if (im[i] >= rstart && im[i] < rend) {
218       row = im[i] - rstart;
219       for ( j=0; j<n; j++ ) {
220         if (in[j] >= cstart && in[j] < cend){
221           col = in[j] - cstart;
222           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         }
226 #if defined(USE_PETSC_BOPT_g)
227         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
228         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
229 #endif
230         else {
231           if (mat->was_assembled) {
232             if (!aij->colmap) {
233               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
234             }
235 #if defined (USE_CTABLE)
236 	    col = TableFind( aij->colmap, in[j] + 1 ) - 1;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ *) B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     }
256     else {
257       if (roworiented && !aij->donotstash) {
258         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
259       }
260       else {
261         if (!aij->donotstash) {
262           row = im[i];
263           for ( j=0; j<n; j++ ) {
264             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
265           }
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatGetValues_MPIAIJ"
275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
276 {
277   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
278   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
279   int        cstart = aij->cstart, cend = aij->cend,row,col;
280 
281   PetscFunctionBegin;
282   for ( i=0; i<m; i++ ) {
283     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
284     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
285     if (idxm[i] >= rstart && idxm[i] < rend) {
286       row = idxm[i] - rstart;
287       for ( j=0; j<n; j++ ) {
288         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
289         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
290         if (idxn[j] >= cstart && idxn[j] < cend){
291           col = idxn[j] - cstart;
292           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
293         } else {
294           if (!aij->colmap) {
295             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
296           }
297 #if defined (USE_CTABLE)
298 	  col = TableFind( aij->colmap, idxn[j] + 1 ) - 1;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNC__
316 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
317 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
320   MPI_Comm    comm = mat->comm;
321   int         size = aij->size, *owners = aij->rowners;
322   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
323   MPI_Request *send_waits,*recv_waits;
324   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
325   InsertMode  addv;
326   Scalar      *rvalues,*svalues;
327 
328   PetscFunctionBegin;
329   if (aij->donotstash) {
330     aij->svalues    = 0; aij->rvalues    = 0;
331     aij->nsends     = 0; aij->nrecvs     = 0;
332     aij->send_waits = 0; aij->recv_waits = 0;
333     aij->rmax       = 0;
334     PetscFunctionReturn(0);
335   }
336 
337   /* make sure all processors are either in INSERTMODE or ADDMODE */
338   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
339   if (addv == (ADD_VALUES|INSERT_VALUES)) {
340     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
341   }
342   mat->insertmode = addv; /* in case this processor had no cache */
343 
344   /*  first count number of contributors to each processor */
345   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
346   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
347   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
348   for ( i=0; i<aij->stash.n; i++ ) {
349     idx = aij->stash.idx[i];
350     for ( j=0; j<size; j++ ) {
351       if (idx >= owners[j] && idx < owners[j+1]) {
352         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
353       }
354     }
355   }
356   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
357 
358   /* inform other processors of number of messages and max length*/
359   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
360   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
361   nreceives = work[rank];
362   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
363   nmax = work[rank];
364   PetscFree(work);
365 
366   /* post receives:
367        1) each message will consist of ordered pairs
368      (global index,value) we store the global index as a double
369      to simplify the message passing.
370        2) since we don't know how long each individual message is we
371      allocate the largest needed buffer for each receive. Potentially
372      this is a lot of wasted space.
373 
374 
375        This could be done better.
376   */
377   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
378   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
379   for ( i=0; i<nreceives; i++ ) {
380     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
381               comm,recv_waits+i);CHKERRQ(ierr);
382   }
383 
384   /* do sends:
385       1) starts[i] gives the starting index in svalues for stuff going to
386          the ith processor
387   */
388   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
389   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
390   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
391   starts[0]  = 0;
392   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
393   for ( i=0; i<aij->stash.n; i++ ) {
394     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
395     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
396     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
397   }
398   PetscFree(owner);
399   starts[0] = 0;
400   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
401   count = 0;
402   for ( i=0; i<size; i++ ) {
403     if (procs[i]) {
404       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
405     }
406   }
407   PetscFree(starts);
408   PetscFree(nprocs);
409 
410   /* Free cache space */
411   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
412   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
413 
414   aij->svalues    = svalues;    aij->rvalues    = rvalues;
415   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
416   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
417   aij->rmax       = nmax;
418 
419   PetscFunctionReturn(0);
420 }
421 extern int MatSetUpMultiply_MPIAIJ(Mat);
422 
423 #undef __FUNC__
424 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
425 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
426 {
427   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
428   MPI_Status  *send_status,recv_status;
429   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
430   int         row,col,other_disassembled;
431   Scalar      *values,val;
432   InsertMode  addv = mat->insertmode;
433 
434   PetscFunctionBegin;
435 
436   /*  wait on receives */
437   while (count) {
438     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
439     /* unpack receives into our local space */
440     values = aij->rvalues + 3*imdex*aij->rmax;
441     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
442     n = n/3;
443     for ( i=0; i<n; i++ ) {
444       row = (int) PetscReal(values[3*i]) - aij->rstart;
445       col = (int) PetscReal(values[3*i+1]);
446       val = values[3*i+2];
447       if (col >= aij->cstart && col < aij->cend) {
448         col -= aij->cstart;
449         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
450       } else {
451         if (mat->was_assembled || mat->assembled) {
452           if (!aij->colmap) {
453             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
454           }
455 #if defined (USE_CTABLE)
456           col = TableFind( aij->colmap, col + 1 ) - 1;
457 #else
458           col = aij->colmap[col] - 1;
459 #endif
460           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
461             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
462             col = (int) PetscReal(values[3*i+1]);
463           }
464         }
465         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
466       }
467     }
468     count--;
469   }
470   if (aij->recv_waits) PetscFree(aij->recv_waits);
471   if (aij->rvalues)    PetscFree(aij->rvalues);
472 
473   /* wait on sends */
474   if (aij->nsends) {
475     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
476     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
477     PetscFree(send_status);
478   }
479   if (aij->send_waits) PetscFree(aij->send_waits);
480   if (aij->svalues)    PetscFree(aij->svalues);
481 
482   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
483   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
484 
485   /* determine if any processor has disassembled, if so we must
486      also disassemble ourselfs, in order that we may reassemble. */
487   /*
488      if nonzero structure of submatrix B cannot change then we know that
489      no processor disassembled thus we can skip this stuff
490   */
491   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
492     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
493     if (mat->was_assembled && !other_disassembled) {
494       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
495     }
496   }
497 
498   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
499     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
500   }
501   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
502   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
503 
504   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNC__
509 #define __FUNC__ "MatZeroEntries_MPIAIJ"
510 int MatZeroEntries_MPIAIJ(Mat A)
511 {
512   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
513   int        ierr;
514 
515   PetscFunctionBegin;
516   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
517   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 #undef __FUNC__
522 #define __FUNC__ "MatZeroRows_MPIAIJ"
523 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
524 {
525   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
526   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
527   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
528   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
529   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
530   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
531   MPI_Comm       comm = A->comm;
532   MPI_Request    *send_waits,*recv_waits;
533   MPI_Status     recv_status,*send_status;
534   IS             istmp;
535   PetscTruth     localdiag;
536 
537   PetscFunctionBegin;
538   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
539   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
540 
541   /*  first count number of contributors to each processor */
542   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
543   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
544   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
545   for ( i=0; i<N; i++ ) {
546     idx = rows[i];
547     found = 0;
548     for ( j=0; j<size; j++ ) {
549       if (idx >= owners[j] && idx < owners[j+1]) {
550         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
551       }
552     }
553     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
554   }
555   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
556 
557   /* inform other processors of number of messages and max length*/
558   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
559   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
560   nrecvs = work[rank];
561   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
562   nmax = work[rank];
563   PetscFree(work);
564 
565   /* post receives:   */
566   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
567   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
568   for ( i=0; i<nrecvs; i++ ) {
569     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
570   }
571 
572   /* do sends:
573       1) starts[i] gives the starting index in svalues for stuff going to
574          the ith processor
575   */
576   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
577   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
578   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
579   starts[0] = 0;
580   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
581   for ( i=0; i<N; i++ ) {
582     svalues[starts[owner[i]]++] = rows[i];
583   }
584   ISRestoreIndices(is,&rows);
585 
586   starts[0] = 0;
587   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
588   count = 0;
589   for ( i=0; i<size; i++ ) {
590     if (procs[i]) {
591       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
592     }
593   }
594   PetscFree(starts);
595 
596   base = owners[rank];
597 
598   /*  wait on receives */
599   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
600   source = lens + nrecvs;
601   count  = nrecvs; slen = 0;
602   while (count) {
603     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
604     /* unpack receives into our local space */
605     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
606     source[imdex]  = recv_status.MPI_SOURCE;
607     lens[imdex]  = n;
608     slen += n;
609     count--;
610   }
611   PetscFree(recv_waits);
612 
613   /* move the data into the send scatter */
614   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
615   count = 0;
616   for ( i=0; i<nrecvs; i++ ) {
617     values = rvalues + i*nmax;
618     for ( j=0; j<lens[i]; j++ ) {
619       lrows[count++] = values[j] - base;
620     }
621   }
622   PetscFree(rvalues); PetscFree(lens);
623   PetscFree(owner); PetscFree(nprocs);
624 
625   /* actually zap the local rows */
626   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
627   PLogObjectParent(A,istmp);
628 
629   /*
630         Zero the required rows. If the "diagonal block" of the matrix
631      is square and the user wishes to set the diagonal we use seperate
632      code so that MatSetValues() is not called for each diagonal allocating
633      new memory, thus calling lots of mallocs and slowing things down.
634 
635        Contributed by: Mathew Knepley
636   */
637   localdiag = PETSC_FALSE;
638   if (diag && (l->A->M == l->A->N)) {
639     localdiag = PETSC_TRUE;
640     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
641   } else {
642     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
643   }
644   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
645   ierr = ISDestroy(istmp); CHKERRQ(ierr);
646 
647   if (diag && (localdiag == PETSC_FALSE)) {
648     for ( i = 0; i < slen; i++ ) {
649       row = lrows[i] + rstart;
650       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
651     }
652     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
653     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
654   }
655 
656   if (diag) {
657     for ( i = 0; i < slen; i++ ) {
658       row = lrows[i] + rstart;
659       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
660     }
661     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
662     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
663   }
664   PetscFree(lrows);
665 
666   /* wait on sends */
667   if (nsends) {
668     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
669     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
670     PetscFree(send_status);
671   }
672   PetscFree(send_waits); PetscFree(svalues);
673 
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNC__
678 #define __FUNC__ "MatMult_MPIAIJ"
679 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
680 {
681   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
682   int        ierr,nt;
683 
684   PetscFunctionBegin;
685   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
686   if (nt != a->n) {
687     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
688   }
689   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
690   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
691   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
692   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNC__
697 #define __FUNC__ "MatMultAdd_MPIAIJ"
698 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
699 {
700   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
701   int        ierr;
702 
703   PetscFunctionBegin;
704   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
705   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
706   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
707   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNC__
712 #define __FUNC__ "MatMultTrans_MPIAIJ"
713 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
716   int        ierr;
717 
718   PetscFunctionBegin;
719   /* do nondiagonal part */
720   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
721   /* send it on its way */
722   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
723   /* do local part */
724   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
725   /* receive remote parts: note this assumes the values are not actually */
726   /* inserted in yy until the next line, which is true for my implementation*/
727   /* but is not perhaps always true. */
728   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNC__
733 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
734 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
735 {
736   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
737   int        ierr;
738 
739   PetscFunctionBegin;
740   /* do nondiagonal part */
741   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
742   /* send it on its way */
743   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
744   /* do local part */
745   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
746   /* receive remote parts: note this assumes the values are not actually */
747   /* inserted in yy until the next line, which is true for my implementation*/
748   /* but is not perhaps always true. */
749   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 /*
754   This only works correctly for square matrices where the subblock A->A is the
755    diagonal block
756 */
757 #undef __FUNC__
758 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
759 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
760 {
761   int        ierr;
762   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
763 
764   PetscFunctionBegin;
765   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
766   if (a->rstart != a->cstart || a->rend != a->cend) {
767     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
768   }
769   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNC__
774 #define __FUNC__ "MatScale_MPIAIJ"
775 int MatScale_MPIAIJ(Scalar *aa,Mat A)
776 {
777   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
778   int        ierr;
779 
780   PetscFunctionBegin;
781   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
782   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNC__
787 #define __FUNC__ "MatDestroy_MPIAIJ"
788 int MatDestroy_MPIAIJ(Mat mat)
789 {
790   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
791   int        ierr;
792 
793   PetscFunctionBegin;
794   if (--mat->refct > 0) PetscFunctionReturn(0);
795 
796   if (mat->mapping) {
797     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
798   }
799   if (mat->bmapping) {
800     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
801   }
802   if (mat->rmap) {
803     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
804   }
805   if (mat->cmap) {
806     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
807   }
808 #if defined(USE_PETSC_LOG)
809   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
810 #endif
811   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
812   PetscFree(aij->rowners);
813   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
814   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
815 #if defined (USE_CTABLE)
816   if (aij->colmap) TableDelete(aij->colmap);
817 #else
818   if (aij->colmap) PetscFree(aij->colmap);
819 #endif
820   if (aij->garray) PetscFree(aij->garray);
821   if (aij->lvec)   VecDestroy(aij->lvec);
822   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
823   if (aij->rowvalues) PetscFree(aij->rowvalues);
824   PetscFree(aij);
825   PLogObjectDestroy(mat);
826   PetscHeaderDestroy(mat);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNC__
831 #define __FUNC__ "MatView_MPIAIJ_Binary"
832 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
833 {
834   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
835   int         ierr;
836 
837   PetscFunctionBegin;
838   if (aij->size == 1) {
839     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
840   }
841   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
842   PetscFunctionReturn(0);
843 }
844 
845 #undef __FUNC__
846 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
847 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
848 {
849   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
850   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
851   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
852   int         size = aij->size;
853   FILE        *fd;
854   ViewerType  vtype;
855 
856   PetscFunctionBegin;
857   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
858   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
859     ierr = ViewerGetFormat(viewer,&format);
860     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
861       MatInfo info;
862       int     flg;
863       MPI_Comm_rank(mat->comm,&rank);
864       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
865       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
866       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
867       PetscSequentialPhaseBegin(mat->comm,1);
868       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
869          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
870       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
871          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
872       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
873       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
874       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
875       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
876       fflush(fd);
877       PetscSequentialPhaseEnd(mat->comm,1);
878       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
881       PetscFunctionReturn(0);
882     }
883   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
884     Draw       draw;
885     PetscTruth isnull;
886     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
887     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
888   }
889 
890   if (size == 1) {
891     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
892   } else {
893     /* assemble the entire matrix onto first processor. */
894     Mat         A;
895     Mat_SeqAIJ *Aloc;
896     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
897     Scalar      *a;
898 
899     if (!rank) {
900       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
901     } else {
902       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
903     }
904     PLogObjectParent(mat,A);
905 
906     /* copy over the A part */
907     Aloc = (Mat_SeqAIJ*) aij->A->data;
908     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
909     row = aij->rstart;
910     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
911     for ( i=0; i<m; i++ ) {
912       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
913       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
914     }
915     aj = Aloc->j;
916     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
917 
918     /* copy over the B part */
919     Aloc = (Mat_SeqAIJ*) aij->B->data;
920     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
921     row = aij->rstart;
922     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
923     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
924     for ( i=0; i<m; i++ ) {
925       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
926       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
927     }
928     PetscFree(ct);
929     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
930     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
931     /*
932        Everyone has to call to draw the matrix since the graphics waits are
933        synchronized across all processors that share the Draw object
934     */
935     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
936       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
937     }
938     ierr = MatDestroy(A); CHKERRQ(ierr);
939   }
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNC__
944 #define __FUNC__ "MatView_MPIAIJ"
945 int MatView_MPIAIJ(Mat mat,Viewer viewer)
946 {
947   int         ierr;
948   ViewerType  vtype;
949 
950   PetscFunctionBegin;
951   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
952   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
953       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
954     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
955   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
956     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
957   } else {
958     SETERRQ(1,1,"Viewer type not supported by PETSc object");
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 /*
964     This has to provide several versions.
965 
966      2) a) use only local smoothing updating outer values only once.
967         b) local smoothing updating outer values each inner iteration
968      3) color updating out values betwen colors.
969 */
970 #undef __FUNC__
971 #define __FUNC__ "MatRelax_MPIAIJ"
972 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
973                            double fshift,int its,Vec xx)
974 {
975   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
976   Mat        AA = mat->A, BB = mat->B;
977   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
978   Scalar     *b,*x,*xs,*ls,d,*v,sum;
979   int        ierr,*idx, *diag;
980   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
981 
982   PetscFunctionBegin;
983   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
984   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
985   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
986   xs = x + shift; /* shift by one for index start of 1 */
987   ls = ls + shift;
988   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
989   diag = A->diag;
990   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
991     if (flag & SOR_ZERO_INITIAL_GUESS) {
992       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
993       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
994       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
995       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
996       PetscFunctionReturn(0);
997     }
998     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
999     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1000     while (its--) {
1001       /* go down through the rows */
1002       for ( i=0; i<m; i++ ) {
1003         n    = A->i[i+1] - A->i[i];
1004 	PLogFlops(4*n+3);
1005         idx  = A->j + A->i[i] + shift;
1006         v    = A->a + A->i[i] + shift;
1007         sum  = b[i];
1008         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1009         d    = fshift + A->a[diag[i]+shift];
1010         n    = B->i[i+1] - B->i[i];
1011         idx  = B->j + B->i[i] + shift;
1012         v    = B->a + B->i[i] + shift;
1013         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1014         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1015       }
1016       /* come up through the rows */
1017       for ( i=m-1; i>-1; i-- ) {
1018         n    = A->i[i+1] - A->i[i];
1019 	PLogFlops(4*n+3)
1020         idx  = A->j + A->i[i] + shift;
1021         v    = A->a + A->i[i] + shift;
1022         sum  = b[i];
1023         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1024         d    = fshift + A->a[diag[i]+shift];
1025         n    = B->i[i+1] - B->i[i];
1026         idx  = B->j + B->i[i] + shift;
1027         v    = B->a + B->i[i] + shift;
1028         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1029         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1030       }
1031     }
1032   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1035       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1036       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1037       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1038       PetscFunctionReturn(0);
1039     }
1040     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1042     while (its--) {
1043       for ( i=0; i<m; i++ ) {
1044         n    = A->i[i+1] - A->i[i];
1045 	PLogFlops(4*n+3);
1046         idx  = A->j + A->i[i] + shift;
1047         v    = A->a + A->i[i] + shift;
1048         sum  = b[i];
1049         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1050         d    = fshift + A->a[diag[i]+shift];
1051         n    = B->i[i+1] - B->i[i];
1052         idx  = B->j + B->i[i] + shift;
1053         v    = B->a + B->i[i] + shift;
1054         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1055         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1056       }
1057     }
1058   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1059     if (flag & SOR_ZERO_INITIAL_GUESS) {
1060       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1061       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1062       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1063       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1064       PetscFunctionReturn(0);
1065     }
1066     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068     while (its--) {
1069       for ( i=m-1; i>-1; i-- ) {
1070         n    = A->i[i+1] - A->i[i];
1071 	PLogFlops(4*n+3);
1072         idx  = A->j + A->i[i] + shift;
1073         v    = A->a + A->i[i] + shift;
1074         sum  = b[i];
1075         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1076         d    = fshift + A->a[diag[i]+shift];
1077         n    = B->i[i+1] - B->i[i];
1078         idx  = B->j + B->i[i] + shift;
1079         v    = B->a + B->i[i] + shift;
1080         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1081         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1082       }
1083     }
1084   } else {
1085     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1086   }
1087   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1088   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1089   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNC__
1094 #define __FUNC__ "MatGetInfo_MPIAIJ"
1095 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1096 {
1097   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1098   Mat        A = mat->A, B = mat->B;
1099   int        ierr;
1100   double     isend[5], irecv[5];
1101 
1102   PetscFunctionBegin;
1103   info->block_size     = 1.0;
1104   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1105   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1106   isend[3] = info->memory;  isend[4] = info->mallocs;
1107   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1108   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1109   isend[3] += info->memory;  isend[4] += info->mallocs;
1110   if (flag == MAT_LOCAL) {
1111     info->nz_used      = isend[0];
1112     info->nz_allocated = isend[1];
1113     info->nz_unneeded  = isend[2];
1114     info->memory       = isend[3];
1115     info->mallocs      = isend[4];
1116   } else if (flag == MAT_GLOBAL_MAX) {
1117     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1118     info->nz_used      = irecv[0];
1119     info->nz_allocated = irecv[1];
1120     info->nz_unneeded  = irecv[2];
1121     info->memory       = irecv[3];
1122     info->mallocs      = irecv[4];
1123   } else if (flag == MAT_GLOBAL_SUM) {
1124     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1125     info->nz_used      = irecv[0];
1126     info->nz_allocated = irecv[1];
1127     info->nz_unneeded  = irecv[2];
1128     info->memory       = irecv[3];
1129     info->mallocs      = irecv[4];
1130   }
1131   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1132   info->fill_ratio_needed = 0;
1133   info->factor_mallocs    = 0;
1134   info->rows_global       = (double)mat->M;
1135   info->columns_global    = (double)mat->N;
1136   info->rows_local        = (double)mat->m;
1137   info->columns_local     = (double)mat->N;
1138 
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNC__
1143 #define __FUNC__ "MatSetOption_MPIAIJ"
1144 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1147 
1148   PetscFunctionBegin;
1149   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1150       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1151       op == MAT_COLUMNS_UNSORTED ||
1152       op == MAT_COLUMNS_SORTED ||
1153       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1154       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1155         MatSetOption(a->A,op);
1156         MatSetOption(a->B,op);
1157   } else if (op == MAT_ROW_ORIENTED) {
1158     a->roworiented = 1;
1159     MatSetOption(a->A,op);
1160     MatSetOption(a->B,op);
1161   } else if (op == MAT_ROWS_SORTED ||
1162              op == MAT_ROWS_UNSORTED ||
1163              op == MAT_SYMMETRIC ||
1164              op == MAT_STRUCTURALLY_SYMMETRIC ||
1165              op == MAT_YES_NEW_DIAGONALS)
1166     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1167   else if (op == MAT_COLUMN_ORIENTED) {
1168     a->roworiented = 0;
1169     MatSetOption(a->A,op);
1170     MatSetOption(a->B,op);
1171   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1172     a->donotstash = 1;
1173   } else if (op == MAT_NO_NEW_DIAGONALS){
1174     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1175   } else {
1176     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNC__
1182 #define __FUNC__ "MatGetSize_MPIAIJ"
1183 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1184 {
1185   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1186 
1187   PetscFunctionBegin;
1188   if (m) *m = mat->M;
1189   if (n) *n = mat->N;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNC__
1194 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1195 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1196 {
1197   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1198 
1199   PetscFunctionBegin;
1200   if (m) *m = mat->m;
1201   if (n) *n = mat->n;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1207 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1208 {
1209   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1210 
1211   PetscFunctionBegin;
1212   *m = mat->rstart; *n = mat->rend;
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1217 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1218 
1219 #undef __FUNC__
1220 #define __FUNC__ "MatGetRow_MPIAIJ"
1221 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1222 {
1223   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1224   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1225   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1226   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1227   int        *cmap, *idx_p;
1228 
1229   PetscFunctionBegin;
1230   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1231   mat->getrowactive = PETSC_TRUE;
1232 
1233   if (!mat->rowvalues && (idx || v)) {
1234     /*
1235         allocate enough space to hold information from the longest row.
1236     */
1237     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1238     int     max = 1,m = mat->m,tmp;
1239     for ( i=0; i<m; i++ ) {
1240       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1241       if (max < tmp) { max = tmp; }
1242     }
1243     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1244     mat->rowindices = (int *) (mat->rowvalues + max);
1245   }
1246 
1247   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1248   lrow = row - rstart;
1249 
1250   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1251   if (!v)   {pvA = 0; pvB = 0;}
1252   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1253   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1254   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1255   nztot = nzA + nzB;
1256 
1257   cmap  = mat->garray;
1258   if (v  || idx) {
1259     if (nztot) {
1260       /* Sort by increasing column numbers, assuming A and B already sorted */
1261       int imark = -1;
1262       if (v) {
1263         *v = v_p = mat->rowvalues;
1264         for ( i=0; i<nzB; i++ ) {
1265           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1266           else break;
1267         }
1268         imark = i;
1269         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1270         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1271       }
1272       if (idx) {
1273         *idx = idx_p = mat->rowindices;
1274         if (imark > -1) {
1275           for ( i=0; i<imark; i++ ) {
1276             idx_p[i] = cmap[cworkB[i]];
1277           }
1278         } else {
1279           for ( i=0; i<nzB; i++ ) {
1280             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1281             else break;
1282           }
1283           imark = i;
1284         }
1285         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1286         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1287       }
1288     } else {
1289       if (idx) *idx = 0;
1290       if (v)   *v   = 0;
1291     }
1292   }
1293   *nz = nztot;
1294   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNC__
1300 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1301 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1302 {
1303   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1304 
1305   PetscFunctionBegin;
1306   if (aij->getrowactive == PETSC_FALSE) {
1307     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1308   }
1309   aij->getrowactive = PETSC_FALSE;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNC__
1314 #define __FUNC__ "MatNorm_MPIAIJ"
1315 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1316 {
1317   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1318   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1319   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1320   double     sum = 0.0;
1321   Scalar     *v;
1322 
1323   PetscFunctionBegin;
1324   if (aij->size == 1) {
1325     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1326   } else {
1327     if (type == NORM_FROBENIUS) {
1328       v = amat->a;
1329       for (i=0; i<amat->nz; i++ ) {
1330 #if defined(USE_PETSC_COMPLEX)
1331         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1332 #else
1333         sum += (*v)*(*v); v++;
1334 #endif
1335       }
1336       v = bmat->a;
1337       for (i=0; i<bmat->nz; i++ ) {
1338 #if defined(USE_PETSC_COMPLEX)
1339         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1340 #else
1341         sum += (*v)*(*v); v++;
1342 #endif
1343       }
1344       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1345       *norm = sqrt(*norm);
1346     } else if (type == NORM_1) { /* max column norm */
1347       double *tmp, *tmp2;
1348       int    *jj, *garray = aij->garray;
1349       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1350       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1351       PetscMemzero(tmp,aij->N*sizeof(double));
1352       *norm = 0.0;
1353       v = amat->a; jj = amat->j;
1354       for ( j=0; j<amat->nz; j++ ) {
1355         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1356       }
1357       v = bmat->a; jj = bmat->j;
1358       for ( j=0; j<bmat->nz; j++ ) {
1359         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1360       }
1361       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1362       for ( j=0; j<aij->N; j++ ) {
1363         if (tmp2[j] > *norm) *norm = tmp2[j];
1364       }
1365       PetscFree(tmp); PetscFree(tmp2);
1366     } else if (type == NORM_INFINITY) { /* max row norm */
1367       double ntemp = 0.0;
1368       for ( j=0; j<amat->m; j++ ) {
1369         v = amat->a + amat->i[j] + shift;
1370         sum = 0.0;
1371         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1372           sum += PetscAbsScalar(*v); v++;
1373         }
1374         v = bmat->a + bmat->i[j] + shift;
1375         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1376           sum += PetscAbsScalar(*v); v++;
1377         }
1378         if (sum > ntemp) ntemp = sum;
1379       }
1380       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1381     } else {
1382       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1383     }
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNC__
1389 #define __FUNC__ "MatTranspose_MPIAIJ"
1390 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1391 {
1392   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1393   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1394   int        ierr,shift = Aloc->indexshift;
1395   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1396   Mat        B;
1397   Scalar     *array;
1398 
1399   PetscFunctionBegin;
1400   if (matout == PETSC_NULL && M != N) {
1401     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1402   }
1403 
1404   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1405 
1406   /* copy over the A part */
1407   Aloc = (Mat_SeqAIJ*) a->A->data;
1408   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1409   row = a->rstart;
1410   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1411   for ( i=0; i<m; i++ ) {
1412     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1413     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1414   }
1415   aj = Aloc->j;
1416   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1417 
1418   /* copy over the B part */
1419   Aloc = (Mat_SeqAIJ*) a->B->data;
1420   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1421   row = a->rstart;
1422   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1423   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1424   for ( i=0; i<m; i++ ) {
1425     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1426     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1427   }
1428   PetscFree(ct);
1429   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1430   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1431   if (matout != PETSC_NULL) {
1432     *matout = B;
1433   } else {
1434     PetscOps       *Abops;
1435     struct _MatOps *Aops;
1436 
1437     /* This isn't really an in-place transpose .... but free data structures from a */
1438     PetscFree(a->rowners);
1439     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1440     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1441 #if defined (USE_CTABLE)
1442     if (a->colmap) TableDelete(a->colmap);
1443 #else
1444     if (a->colmap) PetscFree(a->colmap);
1445 #endif
1446     if (a->garray) PetscFree(a->garray);
1447     if (a->lvec) VecDestroy(a->lvec);
1448     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1449     PetscFree(a);
1450 
1451     /*
1452        This is horrible, horrible code. We need to keep the
1453       A pointers for the bops and ops but copy everything
1454       else from C.
1455     */
1456     Abops = A->bops;
1457     Aops  = A->ops;
1458     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1459     A->bops = Abops;
1460     A->ops  = Aops;
1461     PetscHeaderDestroy(B);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNC__
1467 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1468 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1469 {
1470   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1471   Mat a = aij->A, b = aij->B;
1472   int ierr,s1,s2,s3;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1476   if (rr) {
1477     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1478     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1479     /* Overlap communication with computation. */
1480     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1481   }
1482   if (ll) {
1483     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1484     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1485     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1486   }
1487   /* scale  the diagonal block */
1488   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1489 
1490   if (rr) {
1491     /* Do a scatter end and then right scale the off-diagonal block */
1492     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1493     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1494   }
1495 
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 
1500 extern int MatPrintHelp_SeqAIJ(Mat);
1501 #undef __FUNC__
1502 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1503 int MatPrintHelp_MPIAIJ(Mat A)
1504 {
1505   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1506   int        ierr;
1507 
1508   PetscFunctionBegin;
1509   if (!a->rank) {
1510     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNC__
1516 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1517 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1518 {
1519   PetscFunctionBegin;
1520   *bs = 1;
1521   PetscFunctionReturn(0);
1522 }
1523 #undef __FUNC__
1524 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1525 int MatSetUnfactored_MPIAIJ(Mat A)
1526 {
1527   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1528   int        ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNC__
1536 #define __FUNC__ "MatEqual_MPIAIJ"
1537 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1538 {
1539   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1540   Mat        a, b, c, d;
1541   PetscTruth flg;
1542   int        ierr;
1543 
1544   PetscFunctionBegin;
1545   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1546   a = matA->A; b = matA->B;
1547   c = matB->A; d = matB->B;
1548 
1549   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1550   if (flg == PETSC_TRUE) {
1551     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1552   }
1553   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNC__
1558 #define __FUNC__ "MatCopy_MPIAIJ"
1559 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1560 {
1561   int        ierr;
1562   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1563   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1564 
1565   PetscFunctionBegin;
1566   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1567     /* because of the column compression in the off-processor part of the matrix a->B,
1568        the number of columns in a->B and b->B may be different, hence we cannot call
1569        the MatCopy() directly on the two parts. If need be, we can provide a more
1570        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1571        then copying the submatrices */
1572     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1573   } else {
1574     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1575     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1581 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1582 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1583 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1584 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1585 
1586 /* -------------------------------------------------------------------*/
1587 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1588        MatGetRow_MPIAIJ,
1589        MatRestoreRow_MPIAIJ,
1590        MatMult_MPIAIJ,
1591        MatMultAdd_MPIAIJ,
1592        MatMultTrans_MPIAIJ,
1593        MatMultTransAdd_MPIAIJ,
1594        0,
1595        0,
1596        0,
1597        0,
1598        0,
1599        0,
1600        MatRelax_MPIAIJ,
1601        MatTranspose_MPIAIJ,
1602        MatGetInfo_MPIAIJ,
1603        MatEqual_MPIAIJ,
1604        MatGetDiagonal_MPIAIJ,
1605        MatDiagonalScale_MPIAIJ,
1606        MatNorm_MPIAIJ,
1607        MatAssemblyBegin_MPIAIJ,
1608        MatAssemblyEnd_MPIAIJ,
1609        0,
1610        MatSetOption_MPIAIJ,
1611        MatZeroEntries_MPIAIJ,
1612        MatZeroRows_MPIAIJ,
1613        0,
1614        0,
1615        0,
1616        0,
1617        MatGetSize_MPIAIJ,
1618        MatGetLocalSize_MPIAIJ,
1619        MatGetOwnershipRange_MPIAIJ,
1620        0,
1621        0,
1622        0,
1623        0,
1624        MatDuplicate_MPIAIJ,
1625        0,
1626        0,
1627        0,
1628        0,
1629        0,
1630        MatGetSubMatrices_MPIAIJ,
1631        MatIncreaseOverlap_MPIAIJ,
1632        MatGetValues_MPIAIJ,
1633        MatCopy_MPIAIJ,
1634        MatPrintHelp_MPIAIJ,
1635        MatScale_MPIAIJ,
1636        0,
1637        0,
1638        0,
1639        MatGetBlockSize_MPIAIJ,
1640        0,
1641        0,
1642        0,
1643        0,
1644        MatFDColoringCreate_MPIAIJ,
1645        0,
1646        MatSetUnfactored_MPIAIJ,
1647        0,
1648        0,
1649        MatGetSubMatrix_MPIAIJ,
1650        0,
1651        0,
1652        MatGetMaps_Petsc};
1653 
1654 /* ----------------------------------------------------------------------------------------*/
1655 
1656 EXTERN_C_BEGIN
1657 #undef __FUNC__
1658 #define __FUNC__ "MatStoreValues_MPIAIJ"
1659 int MatStoreValues_MPIAIJ(Mat mat)
1660 {
1661   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1662   int        ierr;
1663 
1664   PetscFunctionBegin;
1665   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1666   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 EXTERN_C_END
1670 
1671 EXTERN_C_BEGIN
1672 #undef __FUNC__
1673 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1674 int MatRetrieveValues_MPIAIJ(Mat mat)
1675 {
1676   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1677   int        ierr;
1678 
1679   PetscFunctionBegin;
1680   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1681   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 EXTERN_C_END
1685 
1686 #include "pc.h"
1687 EXTERN_C_BEGIN
1688 extern int PCSetUp_BJacobi_AIJ(PC);
1689 EXTERN_C_END
1690 
1691 #undef __FUNC__
1692 #define __FUNC__ "MatCreateMPIAIJ"
1693 /*@C
1694    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1695    (the default parallel PETSc format).  For good matrix assembly performance
1696    the user should preallocate the matrix storage by setting the parameters
1697    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1698    performance can be increased by more than a factor of 50.
1699 
1700    Collective on MPI_Comm
1701 
1702    Input Parameters:
1703 +  comm - MPI communicator
1704 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1705            This value should be the same as the local size used in creating the
1706            y vector for the matrix-vector product y = Ax.
1707 .  n - This value should be the same as the local size used in creating the
1708        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1709        calculated if N is given) For square matrices n is almost always m.
1710 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1711 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1712 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1713            (same value is used for all local rows)
1714 .  d_nnz - array containing the number of nonzeros in the various rows of the
1715            DIAGONAL portion of the local submatrix (possibly different for each row)
1716            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1717            The size of this array is equal to the number of local rows, i.e 'm'.
1718            You must leave room for the diagonal entry even if it is zero.
1719 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1720            submatrix (same value is used for all local rows).
1721 -  o_nnz - array containing the number of nonzeros in the various rows of the
1722            OFF-DIAGONAL portion of the local submatrix (possibly different for
1723            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1724            structure. The size of this array is equal to the number
1725            of local rows, i.e 'm'.
1726 
1727    Output Parameter:
1728 .  A - the matrix
1729 
1730    Notes:
1731    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1732    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1733    storage requirements for this matrix.
1734 
1735    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1736    processor than it must be used on all processors that share the object for
1737    that argument.
1738 
1739    The AIJ format (also called the Yale sparse matrix format or
1740    compressed row storage), is fully compatible with standard Fortran 77
1741    storage.  That is, the stored row and column indices can begin at
1742    either one (as in Fortran) or zero.  See the users manual for details.
1743 
1744    The user MUST specify either the local or global matrix dimensions
1745    (possibly both).
1746 
1747    The parallel matrix is partitioned such that the first m0 rows belong to
1748    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1749    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1750 
1751    The DIAGONAL portion of the local submatrix of a processor can be defined
1752    as the submatrix which is obtained by extraction the part corresponding
1753    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1754    first row that belongs to the processor, and r2 is the last row belonging
1755    to the this processor. This is a square mxm matrix. The remaining portion
1756    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1757 
1758    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1759 
1760    By default, this format uses inodes (identical nodes) when possible.
1761    We search for consecutive rows with the same nonzero structure, thereby
1762    reusing matrix information to achieve increased efficiency.
1763 
1764    Options Database Keys:
1765 +  -mat_aij_no_inode  - Do not use inodes
1766 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1767 -  -mat_aij_oneindex - Internally use indexing starting at 1
1768         rather than 0.  Note that when calling MatSetValues(),
1769         the user still MUST index entries starting at 0!
1770 .   -mat_mpi - use the parallel matrix data structures even on one processor
1771                (defaults to using SeqBAIJ format on one processor)
1772 .   -mat_mpi - use the parallel matrix data structures even on one processor
1773                (defaults to using SeqAIJ format on one processor)
1774 
1775 
1776    Example usage:
1777 
1778    Consider the following 8x8 matrix with 34 non-zero values, that is
1779    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1780    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1781    as follows:
1782 
1783 .vb
1784             1  2  0  |  0  3  0  |  0  4
1785     Proc0   0  5  6  |  7  0  0  |  8  0
1786             9  0 10  | 11  0  0  | 12  0
1787     -------------------------------------
1788            13  0 14  | 15 16 17  |  0  0
1789     Proc1   0 18  0  | 19 20 21  |  0  0
1790             0  0  0  | 22 23  0  | 24  0
1791     -------------------------------------
1792     Proc2  25 26 27  |  0  0 28  | 29  0
1793            30  0  0  | 31 32 33  |  0 34
1794 .ve
1795 
1796    This can be represented as a collection of submatrices as:
1797 
1798 .vb
1799       A B C
1800       D E F
1801       G H I
1802 .ve
1803 
1804    Where the submatrices A,B,C are owned by proc0, D,E,F are
1805    owned by proc1, G,H,I are owned by proc2.
1806 
1807    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1808    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1809    The 'M','N' parameters are 8,8, and have the same values on all procs.
1810 
1811    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1812    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1813    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1814    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1815    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1816    matrix, ans [DF] as another SeqAIJ matrix.
1817 
1818    When d_nz, o_nz parameters are specified, d_nz storage elements are
1819    allocated for every row of the local diagonal submatrix, and o_nz
1820    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1821    One way to choose d_nz and o_nz is to use the max nonzerors per local
1822    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1823    In this case, the values of d_nz,o_nz are:
1824 .vb
1825      proc0 : dnz = 2, o_nz = 2
1826      proc1 : dnz = 3, o_nz = 2
1827      proc2 : dnz = 1, o_nz = 4
1828 .ve
1829    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1830    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1831    for proc3. i.e we are using 12+15+10=37 storage locations to store
1832    34 values.
1833 
1834    When d_nnz, o_nnz parameters are specified, the storage is specified
1835    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1836    In the above case the values for d_nnz,o_nnz are:
1837 .vb
1838      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1839      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1840      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1841 .ve
1842    Here the space allocated is sum of all the above values i.e 34, and
1843    hence pre-allocation is perfect.
1844 
1845 .keywords: matrix, aij, compressed row, sparse, parallel
1846 
1847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1848 @*/
1849 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1850 {
1851   Mat          B;
1852   Mat_MPIAIJ   *b;
1853   int          ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0;
1854 
1855   PetscFunctionBegin;
1856   MPI_Comm_size(comm,&size);
1857   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1858   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1859   if (!flag1 && !flag2 && size == 1) {
1860     if (M == PETSC_DECIDE) M = m;
1861     if (N == PETSC_DECIDE) N = n;
1862     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1863     PetscFunctionReturn(0);
1864   }
1865 
1866   *A = 0;
1867   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1868   PLogObjectCreate(B);
1869   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1870   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1871   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1872   B->ops->destroy    = MatDestroy_MPIAIJ;
1873   B->ops->view       = MatView_MPIAIJ;
1874   B->factor          = 0;
1875   B->assembled       = PETSC_FALSE;
1876   B->mapping         = 0;
1877 
1878   B->insertmode      = NOT_SET_VALUES;
1879   b->size            = size;
1880   MPI_Comm_rank(comm,&b->rank);
1881 
1882   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1883     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1884   }
1885 
1886   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1887     work[0] = m; work[1] = n;
1888     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1889     if (M == PETSC_DECIDE) M = sum[0];
1890     if (N == PETSC_DECIDE) N = sum[1];
1891   }
1892   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1893   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1894   b->m = m; B->m = m;
1895   b->n = n; B->n = n;
1896   b->N = N; B->N = N;
1897   b->M = M; B->M = M;
1898 
1899   /* the information in the maps duplicates the information computed below, eventually
1900      we should remove the duplicate information that is not contained in the maps */
1901   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1902   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1903 
1904   /* build local table of row and column ownerships */
1905   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1906   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1907   b->cowners = b->rowners + b->size + 2;
1908   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1909   b->rowners[0] = 0;
1910   for ( i=2; i<=b->size; i++ ) {
1911     b->rowners[i] += b->rowners[i-1];
1912   }
1913   b->rstart = b->rowners[b->rank];
1914   b->rend   = b->rowners[b->rank+1];
1915   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1916   b->cowners[0] = 0;
1917   for ( i=2; i<=b->size; i++ ) {
1918     b->cowners[i] += b->cowners[i-1];
1919   }
1920   b->cstart = b->cowners[b->rank];
1921   b->cend   = b->cowners[b->rank+1];
1922 
1923   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1924   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1925   PLogObjectParent(B,b->A);
1926   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1927   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1928   PLogObjectParent(B,b->B);
1929 
1930   /* build cache for off array entries formed */
1931   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1932   b->donotstash  = 0;
1933   b->colmap      = 0;
1934   b->garray      = 0;
1935   b->roworiented = 1;
1936 
1937   /* stuff used for matrix vector multiply */
1938   b->lvec      = 0;
1939   b->Mvctx     = 0;
1940 
1941   /* stuff for MatGetRow() */
1942   b->rowindices   = 0;
1943   b->rowvalues    = 0;
1944   b->getrowactive = PETSC_FALSE;
1945 
1946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1947                                      "MatStoreValues_MPIAIJ",
1948                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1949   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1950                                      "MatRetrieveValues_MPIAIJ",
1951                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1952   ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C",
1953                                      "PCSetUp_BJacobi_AIJ",
1954                                      (void*)PCSetUp_BJacobi_AIJ);CHKERRQ(ierr);
1955   *A = B;
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNC__
1960 #define __FUNC__ "MatDuplicate_MPIAIJ"
1961 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1962 {
1963   Mat        mat;
1964   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1965   int        ierr, len=0, flg;
1966 
1967   PetscFunctionBegin;
1968   *newmat       = 0;
1969   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1970   PLogObjectCreate(mat);
1971   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1972   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1973   mat->ops->destroy    = MatDestroy_MPIAIJ;
1974   mat->ops->view       = MatView_MPIAIJ;
1975   mat->factor     = matin->factor;
1976   mat->assembled  = PETSC_TRUE;
1977 
1978   a->m = mat->m   = oldmat->m;
1979   a->n = mat->n   = oldmat->n;
1980   a->M = mat->M   = oldmat->M;
1981   a->N = mat->N   = oldmat->N;
1982 
1983   a->rstart       = oldmat->rstart;
1984   a->rend         = oldmat->rend;
1985   a->cstart       = oldmat->cstart;
1986   a->cend         = oldmat->cend;
1987   a->size         = oldmat->size;
1988   a->rank         = oldmat->rank;
1989   mat->insertmode = NOT_SET_VALUES;
1990   a->rowvalues    = 0;
1991   a->getrowactive = PETSC_FALSE;
1992 
1993   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1994   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1995   a->cowners = a->rowners + a->size + 2;
1996   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1997   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1998   if (oldmat->colmap) {
1999 #if defined (USE_CTABLE)
2000     ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr);
2001 #else
2002     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
2003     PLogObjectMemory(mat,(a->N)*sizeof(int));
2004     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
2005 #endif
2006   } else a->colmap = 0;
2007   if (oldmat->garray) {
2008     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
2009     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
2010     PLogObjectMemory(mat,len*sizeof(int));
2011     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2012   } else a->garray = 0;
2013 
2014   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2015   PLogObjectParent(mat,a->lvec);
2016   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2017   PLogObjectParent(mat,a->Mvctx);
2018   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2019   PLogObjectParent(mat,a->A);
2020   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2021   PLogObjectParent(mat,a->B);
2022   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2023   if (flg) {
2024     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2025   }
2026   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2027   *newmat = mat;
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #include "sys.h"
2032 
2033 #undef __FUNC__
2034 #define __FUNC__ "MatLoad_MPIAIJ"
2035 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
2036 {
2037   Mat          A;
2038   Scalar       *vals,*svals;
2039   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2040   MPI_Status   status;
2041   int          i, nz, ierr, j,rstart, rend, fd;
2042   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
2043   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
2044   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
2045 
2046   PetscFunctionBegin;
2047   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2048   if (!rank) {
2049     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2050     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2051     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2052     if (header[3] < 0) {
2053       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
2054     }
2055   }
2056 
2057   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2058   M = header[1]; N = header[2];
2059   /* determine ownership of all rows */
2060   m = M/size + ((M % size) > rank);
2061   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
2062   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2063   rowners[0] = 0;
2064   for ( i=2; i<=size; i++ ) {
2065     rowners[i] += rowners[i-1];
2066   }
2067   rstart = rowners[rank];
2068   rend   = rowners[rank+1];
2069 
2070   /* distribute row lengths to all processors */
2071   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
2072   offlens = ourlens + (rend-rstart);
2073   if (!rank) {
2074     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
2075     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2076     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2077     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2078     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2079     PetscFree(sndcounts);
2080   } else {
2081     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2082   }
2083 
2084   if (!rank) {
2085     /* calculate the number of nonzeros on each processor */
2086     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2087     PetscMemzero(procsnz,size*sizeof(int));
2088     for ( i=0; i<size; i++ ) {
2089       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2090         procsnz[i] += rowlengths[j];
2091       }
2092     }
2093     PetscFree(rowlengths);
2094 
2095     /* determine max buffer needed and allocate it */
2096     maxnz = 0;
2097     for ( i=0; i<size; i++ ) {
2098       maxnz = PetscMax(maxnz,procsnz[i]);
2099     }
2100     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2101 
2102     /* read in my part of the matrix column indices  */
2103     nz     = procsnz[0];
2104     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2105     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2106 
2107     /* read in every one elses and ship off */
2108     for ( i=1; i<size; i++ ) {
2109       nz   = procsnz[i];
2110       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2111       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2112     }
2113     PetscFree(cols);
2114   } else {
2115     /* determine buffer space needed for message */
2116     nz = 0;
2117     for ( i=0; i<m; i++ ) {
2118       nz += ourlens[i];
2119     }
2120     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2121 
2122     /* receive message of column indices*/
2123     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2124     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2125     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2126   }
2127 
2128   /* determine column ownership if matrix is not square */
2129   if (N != M) {
2130     n      = N/size + ((N % size) > rank);
2131     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2132     cstart = cend - n;
2133   } else {
2134     cstart = rstart;
2135     cend   = rend;
2136     n      = cend - cstart;
2137   }
2138 
2139   /* loop over local rows, determining number of off diagonal entries */
2140   PetscMemzero(offlens,m*sizeof(int));
2141   jj = 0;
2142   for ( i=0; i<m; i++ ) {
2143     for ( j=0; j<ourlens[i]; j++ ) {
2144       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2145       jj++;
2146     }
2147   }
2148 
2149   /* create our matrix */
2150   for ( i=0; i<m; i++ ) {
2151     ourlens[i] -= offlens[i];
2152   }
2153   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2154   A = *newmat;
2155   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2156   for ( i=0; i<m; i++ ) {
2157     ourlens[i] += offlens[i];
2158   }
2159 
2160   if (!rank) {
2161     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2162 
2163     /* read in my part of the matrix numerical values  */
2164     nz   = procsnz[0];
2165     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2166 
2167     /* insert into matrix */
2168     jj      = rstart;
2169     smycols = mycols;
2170     svals   = vals;
2171     for ( i=0; i<m; i++ ) {
2172       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2173       smycols += ourlens[i];
2174       svals   += ourlens[i];
2175       jj++;
2176     }
2177 
2178     /* read in other processors and ship out */
2179     for ( i=1; i<size; i++ ) {
2180       nz   = procsnz[i];
2181       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2182       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2183     }
2184     PetscFree(procsnz);
2185   } else {
2186     /* receive numeric values */
2187     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2188 
2189     /* receive message of values*/
2190     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2191     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2192     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2193 
2194     /* insert into matrix */
2195     jj      = rstart;
2196     smycols = mycols;
2197     svals   = vals;
2198     for ( i=0; i<m; i++ ) {
2199       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2200       smycols += ourlens[i];
2201       svals   += ourlens[i];
2202       jj++;
2203     }
2204   }
2205   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2206 
2207   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2208   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNC__
2213 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2214 /*
2215     Not great since it makes two copies of the submatrix, first an SeqAIJ
2216   in local and then by concatenating the local matrices the end result.
2217   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2218 */
2219 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2220 {
2221   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2222   Mat        *local,M, Mreuse;
2223   Scalar     *vwork,*aa;
2224   MPI_Comm   comm = mat->comm;
2225   Mat_SeqAIJ *aij;
2226   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2227 
2228   PetscFunctionBegin;
2229   MPI_Comm_rank(comm,&rank);
2230   MPI_Comm_size(comm,&size);
2231 
2232   if (call ==  MAT_REUSE_MATRIX) {
2233     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2234     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2235     local = &Mreuse;
2236     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2237   } else {
2238     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2239     Mreuse = *local;
2240     PetscFree(local);
2241   }
2242 
2243   /*
2244       m - number of local rows
2245       n - number of columns (same on all processors)
2246       rstart - first row in new global matrix generated
2247   */
2248   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2249   if (call == MAT_INITIAL_MATRIX) {
2250     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2251     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2252     ii  = aij->i;
2253     jj  = aij->j;
2254 
2255     /*
2256         Determine the number of non-zeros in the diagonal and off-diagonal
2257         portions of the matrix in order to do correct preallocation
2258     */
2259 
2260     /* first get start and end of "diagonal" columns */
2261     if (csize == PETSC_DECIDE) {
2262       nlocal = n/size + ((n % size) > rank);
2263     } else {
2264       nlocal = csize;
2265     }
2266     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2267     rstart = rend - nlocal;
2268     if (rank == size - 1 && rend != n) {
2269       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2270     }
2271 
2272     /* next, compute all the lengths */
2273     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2274     olens = dlens + m;
2275     for ( i=0; i<m; i++ ) {
2276       jend = ii[i+1] - ii[i];
2277       olen = 0;
2278       dlen = 0;
2279       for ( j=0; j<jend; j++ ) {
2280         if ( *jj < rstart || *jj >= rend) olen++;
2281         else dlen++;
2282         jj++;
2283       }
2284       olens[i] = olen;
2285       dlens[i] = dlen;
2286     }
2287     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2288     PetscFree(dlens);
2289   } else {
2290     int ml,nl;
2291 
2292     M = *newmat;
2293     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2294     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2295     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2296     /*
2297          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2298        rather than the slower MatSetValues().
2299     */
2300     M->was_assembled = PETSC_TRUE;
2301     M->assembled     = PETSC_FALSE;
2302   }
2303   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2304   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2305   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2306   ii  = aij->i;
2307   jj  = aij->j;
2308   aa  = aij->a;
2309   for (i=0; i<m; i++) {
2310     row   = rstart + i;
2311     nz    = ii[i+1] - ii[i];
2312     cwork = jj;     jj += nz;
2313     vwork = aa;     aa += nz;
2314     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2315   }
2316 
2317   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2318   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2319   *newmat = M;
2320 
2321   /* save submatrix used in processor for next request */
2322   if (call ==  MAT_INITIAL_MATRIX) {
2323     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2324     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2325   }
2326 
2327   PetscFunctionReturn(0);
2328 }
2329