xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c31ba22a230fe2c23596c66917081dfec3ccf152)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.231 1998/01/28 21:01:55 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           }
238           else col = in[j];
239           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
240           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
241           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
242         }
243       }
244     }
245     else {
246       if (roworiented && !aij->donotstash) {
247         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
248       }
249       else {
250         if (!aij->donotstash) {
251           row = im[i];
252           for ( j=0; j<n; j++ ) {
253             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
254           }
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatGetValues_MPIAIJ"
264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
265 {
266   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
267   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
268   int        cstart = aij->cstart, cend = aij->cend,row,col;
269 
270   PetscFunctionBegin;
271   for ( i=0; i<m; i++ ) {
272     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
273     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
274     if (idxm[i] >= rstart && idxm[i] < rend) {
275       row = idxm[i] - rstart;
276       for ( j=0; j<n; j++ ) {
277         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
278         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
279         if (idxn[j] >= cstart && idxn[j] < cend){
280           col = idxn[j] - cstart;
281           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
282         }
283         else {
284           if (!aij->colmap) {
285             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
286           }
287           col = aij->colmap[idxn[j]] - 1;
288           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
289           else {
290             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
291           }
292         }
293       }
294     } else {
295       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   PetscFunctionBegin;
315   /* make sure all processors are either in INSERTMODE or ADDMODE */
316   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
317   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
319   }
320   mat->insertmode = addv; /* in case this processor had no cache */
321 
322   /*  first count number of contributors to each processor */
323   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
325   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
326   for ( i=0; i<aij->stash.n; i++ ) {
327     idx = aij->stash.idx[i];
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
331       }
332     }
333   }
334   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
335 
336   /* inform other processors of number of messages and max length*/
337   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
339   nreceives = work[rank];
340   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
341   nmax = work[rank];
342   PetscFree(work);
343 
344   /* post receives:
345        1) each message will consist of ordered pairs
346      (global index,value) we store the global index as a double
347      to simplify the message passing.
348        2) since we don't know how long each individual message is we
349      allocate the largest needed buffer for each receive. Potentially
350      this is a lot of wasted space.
351 
352 
353        This could be done better.
354   */
355   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
357   for ( i=0; i<nreceives; i++ ) {
358     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
359               comm,recv_waits+i);CHKERRQ(ierr);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
367   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
368   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
369   starts[0]  = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<aij->stash.n; i++ ) {
372     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
373     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
374     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
375   }
376   PetscFree(owner);
377   starts[0] = 0;
378   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<size; i++ ) {
381     if (procs[i]) {
382       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   PetscFunctionReturn(0);
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   PetscFunctionBegin;
412   /*  wait on receives */
413   while (count) {
414     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
415     /* unpack receives into our local space */
416     values = aij->rvalues + 3*imdex*aij->rmax;
417     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
418     n = n/3;
419     for ( i=0; i<n; i++ ) {
420       row = (int) PetscReal(values[3*i]) - aij->rstart;
421       col = (int) PetscReal(values[3*i+1]);
422       val = values[3*i+2];
423       if (col >= aij->cstart && col < aij->cend) {
424         col -= aij->cstart;
425         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
426       } else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478 
479   PetscFunctionBegin;
480   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
481   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNC__
486 #define __FUNC__ "MatZeroRows_MPIAIJ"
487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
488 {
489   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
490   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
491   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
492   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
493   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
494   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
495   MPI_Comm       comm = A->comm;
496   MPI_Request    *send_waits,*recv_waits;
497   MPI_Status     recv_status,*send_status;
498   IS             istmp;
499 
500   PetscFunctionBegin;
501   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
502   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
503 
504   /*  first count number of contributors to each processor */
505   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
506   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
507   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
508   for ( i=0; i<N; i++ ) {
509     idx = rows[i];
510     found = 0;
511     for ( j=0; j<size; j++ ) {
512       if (idx >= owners[j] && idx < owners[j+1]) {
513         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
514       }
515     }
516     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
517   }
518   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
519 
520   /* inform other processors of number of messages and max length*/
521   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
522   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
523   nrecvs = work[rank];
524   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
525   nmax = work[rank];
526   PetscFree(work);
527 
528   /* post receives:   */
529   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
530   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
531   for ( i=0; i<nrecvs; i++ ) {
532     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
533   }
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
540   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
541   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
542   starts[0] = 0;
543   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
544   for ( i=0; i<N; i++ ) {
545     svalues[starts[owner[i]]++] = rows[i];
546   }
547   ISRestoreIndices(is,&rows);
548 
549   starts[0] = 0;
550   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
551   count = 0;
552   for ( i=0; i<size; i++ ) {
553     if (procs[i]) {
554       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
555     }
556   }
557   PetscFree(starts);
558 
559   base = owners[rank];
560 
561   /*  wait on receives */
562   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
563   source = lens + nrecvs;
564   count  = nrecvs; slen = 0;
565   while (count) {
566     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
567     /* unpack receives into our local space */
568     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
569     source[imdex]  = recv_status.MPI_SOURCE;
570     lens[imdex]  = n;
571     slen += n;
572     count--;
573   }
574   PetscFree(recv_waits);
575 
576   /* move the data into the send scatter */
577   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
578   count = 0;
579   for ( i=0; i<nrecvs; i++ ) {
580     values = rvalues + i*nmax;
581     for ( j=0; j<lens[i]; j++ ) {
582       lrows[count++] = values[j] - base;
583     }
584   }
585   PetscFree(rvalues); PetscFree(lens);
586   PetscFree(owner); PetscFree(nprocs);
587 
588   /* actually zap the local rows */
589   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
590   PLogObjectParent(A,istmp);
591 
592   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
593   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
594   ierr = ISDestroy(istmp); CHKERRQ(ierr);
595 
596   if (diag) {
597     for ( i = 0; i < slen; i++ ) {
598       row = lrows[i] + rstart;
599       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
600     }
601     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
602     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
603   }
604   PetscFree(lrows);
605  /* wait on sends */
606   if (nsends) {
607     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
608     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
609     PetscFree(send_status);
610   }
611   PetscFree(send_waits); PetscFree(svalues);
612 
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNC__
617 #define __FUNC__ "MatMult_MPIAIJ"
618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
619 {
620   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
621   int        ierr,nt;
622 
623   PetscFunctionBegin;
624   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
625   if (nt != a->n) {
626     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
627   }
628   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
629   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
630   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
631   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatMultAdd_MPIAIJ"
637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
638 {
639   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
640   int        ierr;
641 
642   PetscFunctionBegin;
643   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
644   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
645   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
646   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNC__
651 #define __FUNC__ "MatMultTrans_MPIAIJ"
652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
653 {
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655   int        ierr;
656 
657   PetscFunctionBegin;
658   /* do nondiagonal part */
659   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
660   /* send it on its way */
661   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
662   /* do local part */
663   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
664   /* receive remote parts: note this assumes the values are not actually */
665   /* inserted in yy until the next line, which is true for my implementation*/
666   /* but is not perhaps always true. */
667   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676   int        ierr;
677 
678   PetscFunctionBegin;
679   /* do nondiagonal part */
680   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
681   /* send it on its way */
682   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
683   /* do local part */
684   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
685   /* receive remote parts: note this assumes the values are not actually */
686   /* inserted in yy until the next line, which is true for my implementation*/
687   /* but is not perhaps always true. */
688   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 /*
693   This only works correctly for square matrices where the subblock A->A is the
694    diagonal block
695 */
696 #undef __FUNC__
697 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
699 {
700   int        ierr;
701   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
702 
703   PetscFunctionBegin;
704   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
705   if (a->rstart != a->cstart || a->rend != a->cend) {
706     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
707   }
708   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatScale_MPIAIJ"
714 int MatScale_MPIAIJ(Scalar *aa,Mat A)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
717   int        ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
721   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatDestroy_MPIAIJ"
727 int MatDestroy_MPIAIJ(PetscObject obj)
728 {
729   Mat        mat = (Mat) obj;
730   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
731   int        ierr;
732 
733   PetscFunctionBegin;
734 #if defined(USE_PETSC_LOG)
735   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
736 #endif
737   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
738   PetscFree(aij->rowners);
739   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
740   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
741   if (aij->colmap) PetscFree(aij->colmap);
742   if (aij->garray) PetscFree(aij->garray);
743   if (aij->lvec)   VecDestroy(aij->lvec);
744   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
745   if (aij->rowvalues) PetscFree(aij->rowvalues);
746   PetscFree(aij);
747   PLogObjectDestroy(mat);
748   PetscHeaderDestroy(mat);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNC__
753 #define __FUNC__ "MatView_MPIAIJ_Binary"
754 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
757   int         ierr;
758 
759   PetscFunctionBegin;
760   if (aij->size == 1) {
761     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
762   }
763   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNC__
768 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
769 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
770 {
771   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
772   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
773   int         ierr, format,shift = C->indexshift,rank;
774   FILE        *fd;
775   ViewerType  vtype;
776 
777   PetscFunctionBegin;
778   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
779   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
780     ierr = ViewerGetFormat(viewer,&format);
781     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
782       MatInfo info;
783       int     flg;
784       MPI_Comm_rank(mat->comm,&rank);
785       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
786       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
787       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
788       PetscSequentialPhaseBegin(mat->comm,1);
789       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
790          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
791       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
792          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
793       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
794       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
795       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
796       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
797       fflush(fd);
798       PetscSequentialPhaseEnd(mat->comm,1);
799       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
800       PetscFunctionReturn(0);
801     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
802       PetscFunctionReturn(0);
803     }
804   }
805 
806   if (vtype == DRAW_VIEWER) {
807     Draw       draw;
808     PetscTruth isnull;
809     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
810     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
811   }
812 
813   if (vtype == ASCII_FILE_VIEWER) {
814     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
815     PetscSequentialPhaseBegin(mat->comm,1);
816     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
817            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
818            aij->cend);
819     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
820     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
821     fflush(fd);
822     PetscSequentialPhaseEnd(mat->comm,1);
823   } else {
824     int size = aij->size;
825     rank = aij->rank;
826     if (size == 1) {
827       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
828     } else {
829       /* assemble the entire matrix onto first processor. */
830       Mat         A;
831       Mat_SeqAIJ *Aloc;
832       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
833       Scalar      *a;
834 
835       if (!rank) {
836         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
837       } else {
838         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
839       }
840       PLogObjectParent(mat,A);
841 
842       /* copy over the A part */
843       Aloc = (Mat_SeqAIJ*) aij->A->data;
844       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
845       row = aij->rstart;
846       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
847       for ( i=0; i<m; i++ ) {
848         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
849         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
850       }
851       aj = Aloc->j;
852       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
853 
854       /* copy over the B part */
855       Aloc = (Mat_SeqAIJ*) aij->B->data;
856       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
857       row = aij->rstart;
858       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
859       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
860       for ( i=0; i<m; i++ ) {
861         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
862         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
863       }
864       PetscFree(ct);
865       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
866       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
867       /*
868          Everyone has to call to draw the matrix since the graphics waits are
869          synchronized across all processors that share the Draw object
870       */
871       if (!rank || vtype == DRAW_VIEWER) {
872         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
873       }
874       ierr = MatDestroy(A); CHKERRQ(ierr);
875     }
876   }
877   PetscFunctionReturn(0);
878 }
879 
880 #undef __FUNC__
881 #define __FUNC__ "MatView_MPIAIJ"
882 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
883 {
884   Mat         mat = (Mat) obj;
885   int         ierr;
886   ViewerType  vtype;
887 
888   PetscFunctionBegin;
889   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
890   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
891       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
892     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
893   }
894   else if (vtype == BINARY_FILE_VIEWER) {
895     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 /*
901     This has to provide several versions.
902 
903      2) a) use only local smoothing updating outer values only once.
904         b) local smoothing updating outer values each inner iteration
905      3) color updating out values betwen colors.
906 */
907 #undef __FUNC__
908 #define __FUNC__ "MatRelax_MPIAIJ"
909 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
910                            double fshift,int its,Vec xx)
911 {
912   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
913   Mat        AA = mat->A, BB = mat->B;
914   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
915   Scalar     *b,*x,*xs,*ls,d,*v,sum;
916   int        ierr,*idx, *diag;
917   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
918 
919   PetscFunctionBegin;
920   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
921   xs = x + shift; /* shift by one for index start of 1 */
922   ls = ls + shift;
923   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
924   diag = A->diag;
925   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
926     if (flag & SOR_ZERO_INITIAL_GUESS) {
927       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
928       PetscFunctionReturn(0);
929     }
930     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
931     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
932     while (its--) {
933       /* go down through the rows */
934       for ( i=0; i<m; i++ ) {
935         n    = A->i[i+1] - A->i[i];
936         idx  = A->j + A->i[i] + shift;
937         v    = A->a + A->i[i] + shift;
938         sum  = b[i];
939         SPARSEDENSEMDOT(sum,xs,v,idx,n);
940         d    = fshift + A->a[diag[i]+shift];
941         n    = B->i[i+1] - B->i[i];
942         idx  = B->j + B->i[i] + shift;
943         v    = B->a + B->i[i] + shift;
944         SPARSEDENSEMDOT(sum,ls,v,idx,n);
945         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
946       }
947       /* come up through the rows */
948       for ( i=m-1; i>-1; i-- ) {
949         n    = A->i[i+1] - A->i[i];
950         idx  = A->j + A->i[i] + shift;
951         v    = A->a + A->i[i] + shift;
952         sum  = b[i];
953         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954         d    = fshift + A->a[diag[i]+shift];
955         n    = B->i[i+1] - B->i[i];
956         idx  = B->j + B->i[i] + shift;
957         v    = B->a + B->i[i] + shift;
958         SPARSEDENSEMDOT(sum,ls,v,idx,n);
959         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960       }
961     }
962   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
963     if (flag & SOR_ZERO_INITIAL_GUESS) {
964       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     }
967     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
968     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
969     while (its--) {
970       for ( i=0; i<m; i++ ) {
971         n    = A->i[i+1] - A->i[i];
972         idx  = A->j + A->i[i] + shift;
973         v    = A->a + A->i[i] + shift;
974         sum  = b[i];
975         SPARSEDENSEMDOT(sum,xs,v,idx,n);
976         d    = fshift + A->a[diag[i]+shift];
977         n    = B->i[i+1] - B->i[i];
978         idx  = B->j + B->i[i] + shift;
979         v    = B->a + B->i[i] + shift;
980         SPARSEDENSEMDOT(sum,ls,v,idx,n);
981         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
982       }
983     }
984   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
985     if (flag & SOR_ZERO_INITIAL_GUESS) {
986       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
987       PetscFunctionReturn(0);
988     }
989     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
990                             mat->Mvctx); CHKERRQ(ierr);
991     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
992                             mat->Mvctx); CHKERRQ(ierr);
993     while (its--) {
994       for ( i=m-1; i>-1; i-- ) {
995         n    = A->i[i+1] - A->i[i];
996         idx  = A->j + A->i[i] + shift;
997         v    = A->a + A->i[i] + shift;
998         sum  = b[i];
999         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1000         d    = fshift + A->a[diag[i]+shift];
1001         n    = B->i[i+1] - B->i[i];
1002         idx  = B->j + B->i[i] + shift;
1003         v    = B->a + B->i[i] + shift;
1004         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1005         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1006       }
1007     }
1008   } else {
1009     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNC__
1015 #define __FUNC__ "MatGetInfo_MPIAIJ"
1016 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1017 {
1018   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1019   Mat        A = mat->A, B = mat->B;
1020   int        ierr;
1021   double     isend[5], irecv[5];
1022 
1023   PetscFunctionBegin;
1024   info->block_size     = 1.0;
1025   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1026   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1027   isend[3] = info->memory;  isend[4] = info->mallocs;
1028   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1029   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1030   isend[3] += info->memory;  isend[4] += info->mallocs;
1031   if (flag == MAT_LOCAL) {
1032     info->nz_used      = isend[0];
1033     info->nz_allocated = isend[1];
1034     info->nz_unneeded  = isend[2];
1035     info->memory       = isend[3];
1036     info->mallocs      = isend[4];
1037   } else if (flag == MAT_GLOBAL_MAX) {
1038     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1039     info->nz_used      = irecv[0];
1040     info->nz_allocated = irecv[1];
1041     info->nz_unneeded  = irecv[2];
1042     info->memory       = irecv[3];
1043     info->mallocs      = irecv[4];
1044   } else if (flag == MAT_GLOBAL_SUM) {
1045     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1046     info->nz_used      = irecv[0];
1047     info->nz_allocated = irecv[1];
1048     info->nz_unneeded  = irecv[2];
1049     info->memory       = irecv[3];
1050     info->mallocs      = irecv[4];
1051   }
1052   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1053   info->fill_ratio_needed = 0;
1054   info->factor_mallocs    = 0;
1055   info->rows_global       = (double)mat->M;
1056   info->columns_global    = (double)mat->N;
1057   info->rows_local        = (double)mat->m;
1058   info->columns_local     = (double)mat->N;
1059 
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNC__
1064 #define __FUNC__ "MatSetOption_MPIAIJ"
1065 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1066 {
1067   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1068 
1069   PetscFunctionBegin;
1070   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1071       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1072       op == MAT_COLUMNS_UNSORTED ||
1073       op == MAT_COLUMNS_SORTED ||
1074       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1075       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1076         MatSetOption(a->A,op);
1077         MatSetOption(a->B,op);
1078   } else if (op == MAT_ROW_ORIENTED) {
1079     a->roworiented = 1;
1080     MatSetOption(a->A,op);
1081     MatSetOption(a->B,op);
1082   } else if (op == MAT_ROWS_SORTED ||
1083              op == MAT_ROWS_UNSORTED ||
1084              op == MAT_SYMMETRIC ||
1085              op == MAT_STRUCTURALLY_SYMMETRIC ||
1086              op == MAT_YES_NEW_DIAGONALS)
1087     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1088   else if (op == MAT_COLUMN_ORIENTED) {
1089     a->roworiented = 0;
1090     MatSetOption(a->A,op);
1091     MatSetOption(a->B,op);
1092   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1093     a->donotstash = 1;
1094   } else if (op == MAT_NO_NEW_DIAGONALS){
1095     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1096   } else {
1097     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1098   }
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatGetSize_MPIAIJ"
1104 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1105 {
1106   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1107 
1108   PetscFunctionBegin;
1109   if (m) *m = mat->M;
1110   if (n) *n = mat->N;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNC__
1115 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1116 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1117 {
1118   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1119 
1120   PetscFunctionBegin;
1121   if (m) *m = mat->m;
1122   if (n) *n = mat->n;
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNC__
1127 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1128 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1129 {
1130   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1131 
1132   PetscFunctionBegin;
1133   *m = mat->rstart; *n = mat->rend;
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1138 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ "MatGetRow_MPIAIJ"
1142 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1143 {
1144   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1145   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1146   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1147   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1148   int        *cmap, *idx_p;
1149 
1150   PetscFunctionBegin;
1151   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1152   mat->getrowactive = PETSC_TRUE;
1153 
1154   if (!mat->rowvalues && (idx || v)) {
1155     /*
1156         allocate enough space to hold information from the longest row.
1157     */
1158     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1159     int     max = 1,m = mat->m,tmp;
1160     for ( i=0; i<m; i++ ) {
1161       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1162       if (max < tmp) { max = tmp; }
1163     }
1164     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1165     CHKPTRQ(mat->rowvalues);
1166     mat->rowindices = (int *) (mat->rowvalues + max);
1167   }
1168 
1169   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1170   lrow = row - rstart;
1171 
1172   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1173   if (!v)   {pvA = 0; pvB = 0;}
1174   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1175   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1176   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1177   nztot = nzA + nzB;
1178 
1179   cmap  = mat->garray;
1180   if (v  || idx) {
1181     if (nztot) {
1182       /* Sort by increasing column numbers, assuming A and B already sorted */
1183       int imark = -1;
1184       if (v) {
1185         *v = v_p = mat->rowvalues;
1186         for ( i=0; i<nzB; i++ ) {
1187           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1188           else break;
1189         }
1190         imark = i;
1191         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1192         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1193       }
1194       if (idx) {
1195         *idx = idx_p = mat->rowindices;
1196         if (imark > -1) {
1197           for ( i=0; i<imark; i++ ) {
1198             idx_p[i] = cmap[cworkB[i]];
1199           }
1200         } else {
1201           for ( i=0; i<nzB; i++ ) {
1202             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1203             else break;
1204           }
1205           imark = i;
1206         }
1207         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1208         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1209       }
1210     }
1211     else {
1212       if (idx) *idx = 0;
1213       if (v)   *v   = 0;
1214     }
1215   }
1216   *nz = nztot;
1217   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1218   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1224 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1225 {
1226   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1227 
1228   PetscFunctionBegin;
1229   if (aij->getrowactive == PETSC_FALSE) {
1230     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1231   }
1232   aij->getrowactive = PETSC_FALSE;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNC__
1237 #define __FUNC__ "MatNorm_MPIAIJ"
1238 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1239 {
1240   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1241   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1242   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1243   double     sum = 0.0;
1244   Scalar     *v;
1245 
1246   PetscFunctionBegin;
1247   if (aij->size == 1) {
1248     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1249   } else {
1250     if (type == NORM_FROBENIUS) {
1251       v = amat->a;
1252       for (i=0; i<amat->nz; i++ ) {
1253 #if defined(USE_PETSC_COMPLEX)
1254         sum += real(conj(*v)*(*v)); v++;
1255 #else
1256         sum += (*v)*(*v); v++;
1257 #endif
1258       }
1259       v = bmat->a;
1260       for (i=0; i<bmat->nz; i++ ) {
1261 #if defined(USE_PETSC_COMPLEX)
1262         sum += real(conj(*v)*(*v)); v++;
1263 #else
1264         sum += (*v)*(*v); v++;
1265 #endif
1266       }
1267       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1268       *norm = sqrt(*norm);
1269     } else if (type == NORM_1) { /* max column norm */
1270       double *tmp, *tmp2;
1271       int    *jj, *garray = aij->garray;
1272       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1273       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1274       PetscMemzero(tmp,aij->N*sizeof(double));
1275       *norm = 0.0;
1276       v = amat->a; jj = amat->j;
1277       for ( j=0; j<amat->nz; j++ ) {
1278         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1279       }
1280       v = bmat->a; jj = bmat->j;
1281       for ( j=0; j<bmat->nz; j++ ) {
1282         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1283       }
1284       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1285       for ( j=0; j<aij->N; j++ ) {
1286         if (tmp2[j] > *norm) *norm = tmp2[j];
1287       }
1288       PetscFree(tmp); PetscFree(tmp2);
1289     } else if (type == NORM_INFINITY) { /* max row norm */
1290       double ntemp = 0.0;
1291       for ( j=0; j<amat->m; j++ ) {
1292         v = amat->a + amat->i[j] + shift;
1293         sum = 0.0;
1294         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1295           sum += PetscAbsScalar(*v); v++;
1296         }
1297         v = bmat->a + bmat->i[j] + shift;
1298         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1299           sum += PetscAbsScalar(*v); v++;
1300         }
1301         if (sum > ntemp) ntemp = sum;
1302       }
1303       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1304     } else {
1305       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1306     }
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNC__
1312 #define __FUNC__ "MatTranspose_MPIAIJ"
1313 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1314 {
1315   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1316   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1317   int        ierr,shift = Aloc->indexshift;
1318   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1319   Mat        B;
1320   Scalar     *array;
1321 
1322   PetscFunctionBegin;
1323   if (matout == PETSC_NULL && M != N) {
1324     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1325   }
1326 
1327   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1328 
1329   /* copy over the A part */
1330   Aloc = (Mat_SeqAIJ*) a->A->data;
1331   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1332   row = a->rstart;
1333   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1334   for ( i=0; i<m; i++ ) {
1335     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1336     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1337   }
1338   aj = Aloc->j;
1339   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1340 
1341   /* copy over the B part */
1342   Aloc = (Mat_SeqAIJ*) a->B->data;
1343   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1344   row = a->rstart;
1345   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1346   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1347   for ( i=0; i<m; i++ ) {
1348     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1349     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1350   }
1351   PetscFree(ct);
1352   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1353   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1354   if (matout != PETSC_NULL) {
1355     *matout = B;
1356   } else {
1357     PetscOps       *Abops;
1358     struct _MatOps *Aops;
1359 
1360     /* This isn't really an in-place transpose .... but free data structures from a */
1361     PetscFree(a->rowners);
1362     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1363     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1364     if (a->colmap) PetscFree(a->colmap);
1365     if (a->garray) PetscFree(a->garray);
1366     if (a->lvec) VecDestroy(a->lvec);
1367     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1368     PetscFree(a);
1369 
1370     /*
1371        This is horrible, horrible code. We need to keep the
1372       A pointers for the bops and ops but copy everything
1373       else from C.
1374     */
1375     Abops = A->bops;
1376     Aops  = A->ops;
1377     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1378     A->bops = Abops;
1379     A->ops  = Aops;
1380     PetscHeaderDestroy(B);
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNC__
1386 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1387 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1388 {
1389   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1390   Mat a = aij->A, b = aij->B;
1391   int ierr,s1,s2,s3;
1392 
1393   PetscFunctionBegin;
1394   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1395   if (rr) {
1396     s3 = aij->n;
1397     VecGetLocalSize_Fast(rr,s1);
1398     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1399     /* Overlap communication with computation. */
1400     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1401   }
1402   if (ll) {
1403     VecGetLocalSize_Fast(ll,s1);
1404     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1405     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1406   }
1407   /* scale  the diagonal block */
1408   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1409 
1410   if (rr) {
1411     /* Do a scatter end and then right scale the off-diagonal block */
1412     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1413     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1414   }
1415 
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 
1420 extern int MatPrintHelp_SeqAIJ(Mat);
1421 #undef __FUNC__
1422 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1423 int MatPrintHelp_MPIAIJ(Mat A)
1424 {
1425   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1426   int        ierr;
1427 
1428   PetscFunctionBegin;
1429   if (!a->rank) {
1430     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNC__
1436 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1437 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1438 {
1439   PetscFunctionBegin;
1440   *bs = 1;
1441   PetscFunctionReturn(0);
1442 }
1443 #undef __FUNC__
1444 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1445 int MatSetUnfactored_MPIAIJ(Mat A)
1446 {
1447   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1448   int        ierr;
1449 
1450   PetscFunctionBegin;
1451   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNC__
1456 #define __FUNC__ "MatEqual_MPIAIJ"
1457 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1458 {
1459   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1460   Mat        a, b, c, d;
1461   PetscTruth flg;
1462   int        ierr;
1463 
1464   PetscFunctionBegin;
1465   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1466   a = matA->A; b = matA->B;
1467   c = matB->A; d = matB->B;
1468 
1469   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1470   if (flg == PETSC_TRUE) {
1471     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1472   }
1473   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1478 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1479 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1480 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1481 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1482 
1483 /* -------------------------------------------------------------------*/
1484 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1485        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1486        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1487        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1488        0,0,
1489        0,0,
1490        0,0,
1491        MatRelax_MPIAIJ,
1492        MatTranspose_MPIAIJ,
1493        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1494        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1495        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1496        0,
1497        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1498        0,0,0,0,
1499        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1500        0,0,
1501        0,0,MatConvertSameType_MPIAIJ,0,0,
1502        0,0,0,
1503        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1504        MatPrintHelp_MPIAIJ,
1505        MatScale_MPIAIJ,0,0,0,
1506        MatGetBlockSize_MPIAIJ,0,0,0,0,
1507        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1508        0,0,MatGetSubMatrix_MPIAIJ};
1509 
1510 
1511 #undef __FUNC__
1512 #define __FUNC__ "MatCreateMPIAIJ"
1513 /*@C
1514    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1515    (the default parallel PETSc format).  For good matrix assembly performance
1516    the user should preallocate the matrix storage by setting the parameters
1517    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1518    performance can be increased by more than a factor of 50.
1519 
1520    Input Parameters:
1521 .  comm - MPI communicator
1522 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1523            This value should be the same as the local size used in creating the
1524            y vector for the matrix-vector product y = Ax.
1525 .  n - This value should be the same as the local size used in creating the
1526        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1527        calculated if N is given) For square matrices n is almost always m.
1528 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1529 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1530 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1531            (same for all local rows)
1532 .  d_nzz - array containing the number of nonzeros in the various rows of the
1533            diagonal portion of the local submatrix (possibly different for each row)
1534            or PETSC_NULL. You must leave room for the diagonal entry even if
1535            it is zero.
1536 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1537            submatrix (same for all local rows).
1538 .  o_nzz - array containing the number of nonzeros in the various rows of the
1539            off-diagonal portion of the local submatrix (possibly different for
1540            each row) or PETSC_NULL.
1541 
1542    Output Parameter:
1543 .  A - the matrix
1544 
1545    Notes:
1546    The AIJ format (also called the Yale sparse matrix format or
1547    compressed row storage), is fully compatible with standard Fortran 77
1548    storage.  That is, the stored row and column indices can begin at
1549    either one (as in Fortran) or zero.  See the users manual for details.
1550 
1551    The user MUST specify either the local or global matrix dimensions
1552    (possibly both).
1553 
1554    By default, this format uses inodes (identical nodes) when possible.
1555    We search for consecutive rows with the same nonzero structure, thereby
1556    reusing matrix information to achieve increased efficiency.
1557 
1558    Options Database Keys:
1559 $    -mat_aij_no_inode  - Do not use inodes
1560 $    -mat_aij_inode_limit <limit> - Set inode limit.
1561 $        (max limit=5)
1562 $    -mat_aij_oneindex - Internally use indexing starting at 1
1563 $        rather than 0.  Note: When calling MatSetValues(),
1564 $        the user still MUST index entries starting at 0!
1565 
1566    Storage Information:
1567    For a square global matrix we define each processor's diagonal portion
1568    to be its local rows and the corresponding columns (a square submatrix);
1569    each processor's off-diagonal portion encompasses the remainder of the
1570    local matrix (a rectangular submatrix).
1571 
1572    The user can specify preallocated storage for the diagonal part of
1573    the local submatrix with either d_nz or d_nnz (not both).  Set
1574    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1575    memory allocation.  Likewise, specify preallocated storage for the
1576    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1577 
1578    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1579    the figure below we depict these three local rows and all columns (0-11).
1580 
1581 $          0 1 2 3 4 5 6 7 8 9 10 11
1582 $         -------------------
1583 $  row 3  |  o o o d d d o o o o o o
1584 $  row 4  |  o o o d d d o o o o o o
1585 $  row 5  |  o o o d d d o o o o o o
1586 $         -------------------
1587 $
1588 
1589    Thus, any entries in the d locations are stored in the d (diagonal)
1590    submatrix, and any entries in the o locations are stored in the
1591    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1592    stored simply in the MATSEQAIJ format for compressed row storage.
1593 
1594    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1595    and o_nz should indicate the number of nonzeros per row in the o matrix.
1596    In general, for PDE problems in which most nonzeros are near the diagonal,
1597    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1598    or you will get TERRIBLE performance; see the users' manual chapter on
1599    matrices.
1600 
1601 .keywords: matrix, aij, compressed row, sparse, parallel
1602 
1603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1604 @*/
1605 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1606                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1607 {
1608   Mat          B;
1609   Mat_MPIAIJ   *b;
1610   int          ierr, i,sum[2],work[2],size;
1611 
1612   PetscFunctionBegin;
1613   MPI_Comm_size(comm,&size);
1614   if (size == 1) {
1615     if (M == PETSC_DECIDE) M = m;
1616     if (N == PETSC_DECIDE) N = n;
1617     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1618     PetscFunctionReturn(0);
1619   }
1620 
1621   *A = 0;
1622   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1623   PLogObjectCreate(B);
1624   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1625   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1626   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1627   B->destroy    = MatDestroy_MPIAIJ;
1628   B->view       = MatView_MPIAIJ;
1629   B->factor     = 0;
1630   B->assembled  = PETSC_FALSE;
1631   B->mapping    = 0;
1632 
1633   B->insertmode = NOT_SET_VALUES;
1634   b->size       = size;
1635   MPI_Comm_rank(comm,&b->rank);
1636 
1637   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1638     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1639   }
1640 
1641   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1642     work[0] = m; work[1] = n;
1643     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1644     if (M == PETSC_DECIDE) M = sum[0];
1645     if (N == PETSC_DECIDE) N = sum[1];
1646   }
1647   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1648   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1649   b->m = m; B->m = m;
1650   b->n = n; B->n = n;
1651   b->N = N; B->N = N;
1652   b->M = M; B->M = M;
1653 
1654   /* build local table of row and column ownerships */
1655   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1656   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1657   b->cowners = b->rowners + b->size + 2;
1658   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1659   b->rowners[0] = 0;
1660   for ( i=2; i<=b->size; i++ ) {
1661     b->rowners[i] += b->rowners[i-1];
1662   }
1663   b->rstart = b->rowners[b->rank];
1664   b->rend   = b->rowners[b->rank+1];
1665   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1666   b->cowners[0] = 0;
1667   for ( i=2; i<=b->size; i++ ) {
1668     b->cowners[i] += b->cowners[i-1];
1669   }
1670   b->cstart = b->cowners[b->rank];
1671   b->cend   = b->cowners[b->rank+1];
1672 
1673   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1674   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1675   PLogObjectParent(B,b->A);
1676   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1677   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1678   PLogObjectParent(B,b->B);
1679 
1680   /* build cache for off array entries formed */
1681   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1682   b->donotstash  = 0;
1683   b->colmap      = 0;
1684   b->garray      = 0;
1685   b->roworiented = 1;
1686 
1687   /* stuff used for matrix vector multiply */
1688   b->lvec      = 0;
1689   b->Mvctx     = 0;
1690 
1691   /* stuff for MatGetRow() */
1692   b->rowindices   = 0;
1693   b->rowvalues    = 0;
1694   b->getrowactive = PETSC_FALSE;
1695 
1696   *A = B;
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNC__
1701 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1702 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1703 {
1704   Mat        mat;
1705   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1706   int        ierr, len=0, flg;
1707 
1708   PetscFunctionBegin;
1709   *newmat       = 0;
1710   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1711   PLogObjectCreate(mat);
1712   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1713   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1714   mat->destroy    = MatDestroy_MPIAIJ;
1715   mat->view       = MatView_MPIAIJ;
1716   mat->factor     = matin->factor;
1717   mat->assembled  = PETSC_TRUE;
1718 
1719   a->m = mat->m   = oldmat->m;
1720   a->n = mat->n   = oldmat->n;
1721   a->M = mat->M   = oldmat->M;
1722   a->N = mat->N   = oldmat->N;
1723 
1724   a->rstart       = oldmat->rstart;
1725   a->rend         = oldmat->rend;
1726   a->cstart       = oldmat->cstart;
1727   a->cend         = oldmat->cend;
1728   a->size         = oldmat->size;
1729   a->rank         = oldmat->rank;
1730   mat->insertmode = NOT_SET_VALUES;
1731   a->rowvalues    = 0;
1732   a->getrowactive = PETSC_FALSE;
1733 
1734   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1735   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1736   a->cowners = a->rowners + a->size + 2;
1737   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1738   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1739   if (oldmat->colmap) {
1740     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1741     PLogObjectMemory(mat,(a->N)*sizeof(int));
1742     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1743   } else a->colmap = 0;
1744   if (oldmat->garray) {
1745     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1746     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1747     PLogObjectMemory(mat,len*sizeof(int));
1748     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1749   } else a->garray = 0;
1750 
1751   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1752   PLogObjectParent(mat,a->lvec);
1753   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1754   PLogObjectParent(mat,a->Mvctx);
1755   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1756   PLogObjectParent(mat,a->A);
1757   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1758   PLogObjectParent(mat,a->B);
1759   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1760   if (flg) {
1761     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1762   }
1763   *newmat = mat;
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 #include "sys.h"
1768 
1769 #undef __FUNC__
1770 #define __FUNC__ "MatLoad_MPIAIJ"
1771 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1772 {
1773   Mat          A;
1774   Scalar       *vals,*svals;
1775   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1776   MPI_Status   status;
1777   int          i, nz, ierr, j,rstart, rend, fd;
1778   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1779   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1780   int          tag = ((PetscObject)viewer)->tag;
1781 
1782   PetscFunctionBegin;
1783   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1784   if (!rank) {
1785     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1786     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1787     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1788     if (header[3] < 0) {
1789       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1790     }
1791   }
1792 
1793 
1794   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1795   M = header[1]; N = header[2];
1796   /* determine ownership of all rows */
1797   m = M/size + ((M % size) > rank);
1798   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1799   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1800   rowners[0] = 0;
1801   for ( i=2; i<=size; i++ ) {
1802     rowners[i] += rowners[i-1];
1803   }
1804   rstart = rowners[rank];
1805   rend   = rowners[rank+1];
1806 
1807   /* distribute row lengths to all processors */
1808   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1809   offlens = ourlens + (rend-rstart);
1810   if (!rank) {
1811     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1812     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1813     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1814     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1815     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1816     PetscFree(sndcounts);
1817   } else {
1818     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1819   }
1820 
1821   if (!rank) {
1822     /* calculate the number of nonzeros on each processor */
1823     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1824     PetscMemzero(procsnz,size*sizeof(int));
1825     for ( i=0; i<size; i++ ) {
1826       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1827         procsnz[i] += rowlengths[j];
1828       }
1829     }
1830     PetscFree(rowlengths);
1831 
1832     /* determine max buffer needed and allocate it */
1833     maxnz = 0;
1834     for ( i=0; i<size; i++ ) {
1835       maxnz = PetscMax(maxnz,procsnz[i]);
1836     }
1837     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1838 
1839     /* read in my part of the matrix column indices  */
1840     nz     = procsnz[0];
1841     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1842     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1843 
1844     /* read in every one elses and ship off */
1845     for ( i=1; i<size; i++ ) {
1846       nz   = procsnz[i];
1847       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1848       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1849     }
1850     PetscFree(cols);
1851   } else {
1852     /* determine buffer space needed for message */
1853     nz = 0;
1854     for ( i=0; i<m; i++ ) {
1855       nz += ourlens[i];
1856     }
1857     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1858 
1859     /* receive message of column indices*/
1860     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1861     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1862     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1863   }
1864 
1865   /* loop over local rows, determining number of off diagonal entries */
1866   PetscMemzero(offlens,m*sizeof(int));
1867   jj = 0;
1868   for ( i=0; i<m; i++ ) {
1869     for ( j=0; j<ourlens[i]; j++ ) {
1870       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1871       jj++;
1872     }
1873   }
1874 
1875   /* create our matrix */
1876   for ( i=0; i<m; i++ ) {
1877     ourlens[i] -= offlens[i];
1878   }
1879   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1880   A = *newmat;
1881   MatSetOption(A,MAT_COLUMNS_SORTED);
1882   for ( i=0; i<m; i++ ) {
1883     ourlens[i] += offlens[i];
1884   }
1885 
1886   if (!rank) {
1887     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1888 
1889     /* read in my part of the matrix numerical values  */
1890     nz = procsnz[0];
1891     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1892 
1893     /* insert into matrix */
1894     jj      = rstart;
1895     smycols = mycols;
1896     svals   = vals;
1897     for ( i=0; i<m; i++ ) {
1898       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1899       smycols += ourlens[i];
1900       svals   += ourlens[i];
1901       jj++;
1902     }
1903 
1904     /* read in other processors and ship out */
1905     for ( i=1; i<size; i++ ) {
1906       nz   = procsnz[i];
1907       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1908       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1909     }
1910     PetscFree(procsnz);
1911   } else {
1912     /* receive numeric values */
1913     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1914 
1915     /* receive message of values*/
1916     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1917     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1918     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1919 
1920     /* insert into matrix */
1921     jj      = rstart;
1922     smycols = mycols;
1923     svals   = vals;
1924     for ( i=0; i<m; i++ ) {
1925       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1926       smycols += ourlens[i];
1927       svals   += ourlens[i];
1928       jj++;
1929     }
1930   }
1931   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1932 
1933   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1934   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNC__
1939 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1940 /*
1941     Not great since it makes two copies of the submatrix, first an SeqAIJ
1942   in local and then by concatenating the local matrices the end result.
1943   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1944 */
1945 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1946 {
1947   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1948   Mat        *local,M;
1949   Scalar     *vwork,*aa;
1950   MPI_Comm   comm = mat->comm;
1951   Mat_SeqAIJ *aij;
1952   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1953 
1954   PetscFunctionBegin;
1955   MPI_Comm_rank(comm,&rank);
1956   MPI_Comm_size(comm,&size);
1957 
1958   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1959 
1960   /*
1961       m - number of local rows
1962       n - number of columns (same on all processors)
1963       rstart - first row in new global matrix generated
1964   */
1965   ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr);
1966   if (call == MAT_INITIAL_MATRIX) {
1967     aij = (Mat_SeqAIJ *) (*local)->data;
1968     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
1969     ii  = aij->i;
1970     jj  = aij->j;
1971 
1972     /*
1973         Determine the number of non-zeros in the diagonal and off-diagonal
1974         portions of the matrix in order to do correct preallocation
1975     */
1976 
1977     /* first get start and end of "diagonal" columns */
1978     if (csize == PETSC_DECIDE) {
1979       nlocal = n/size + ((n % size) > rank);
1980     } else {
1981       nlocal = csize;
1982     }
1983     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1984     rstart = rend - nlocal;
1985     if (rank == size - 1 && rend != n) {
1986       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
1987     }
1988 
1989     /* next, compute all the lengths */
1990     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
1991     olens = dlens + m;
1992     for ( i=0; i<m; i++ ) {
1993       jend = ii[i+1] - ii[i];
1994       olen = 0;
1995       dlen = 0;
1996       for ( j=0; j<jend; j++ ) {
1997         if ( *jj < rstart || *jj >= rend) olen++;
1998         else dlen++;
1999         jj++;
2000       }
2001       olens[i] = olen;
2002       dlens[i] = dlen;
2003     }
2004     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2005     PetscFree(dlens);
2006   } else {
2007     int ml,nl;
2008 
2009     M = *newmat;
2010     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2011     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2012     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2013   }
2014   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2015   aij = (Mat_SeqAIJ *) (*local)->data;
2016   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2017   ii  = aij->i;
2018   jj  = aij->j;
2019   aa  = aij->a;
2020   for (i=0; i<m; i++) {
2021     row   = rstart + i;
2022     nz    = ii[i+1] - ii[i];
2023     cwork = jj;     jj += nz;
2024     vwork = aa;     aa += nz;
2025     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2026   }
2027 
2028   ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr);
2029   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2030   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2031   *newmat = M;
2032   PetscFunctionReturn(0);
2033 }
2034