xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 71be4e95bfd610b7af581f57119f0c53d0f194b7)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.224 1997/11/03 04:45:30 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   PetscFunctionBegin;
24   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
25   PLogObjectMemory(mat,aij->N*sizeof(int));
26   PetscMemzero(aij->colmap,aij->N*sizeof(int));
27   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
28   PetscFunctionReturn(0);
29 }
30 
31 extern int DisAssemble_MPIAIJ(Mat);
32 
33 #define CHUNKSIZE   15
34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
35 { \
36  \
37     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
38     rmax = aimax[row]; nrow = ailen[row];  \
39     col1 = col - shift; \
40      \
41     low = 0; high = nrow; \
42     while (high-low > 5) { \
43       t = (low+high)/2; \
44       if (rp[t] > col) high = t; \
45       else             low  = t; \
46     } \
47       for ( _i=0; _i<nrow; _i++ ) { \
48         if (rp[_i] > col1) break; \
49         if (rp[_i] == col1) { \
50           if (addv == ADD_VALUES) ap[_i] += value;   \
51           else                  ap[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
57       if (nrow >= rmax) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
60         Scalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
63  \
64         /* malloc new storage space */ \
65         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
66         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
67         new_j   = (int *) (new_a + new_nz); \
68         new_i   = new_j + new_nz; \
69  \
70         /* copy over old data into new slots */ \
71         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
72         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
73         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
74         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
75         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
76                                                            len*sizeof(int)); \
77         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
78         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
79                                                            len*sizeof(Scalar));  \
80         /* free up old matrix storage */ \
81  \
82         PetscFree(a->a);  \
83         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
84         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
85         a->singlemalloc = 1; \
86  \
87         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
88         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
89         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
90         a->maxnz += CHUNKSIZE; \
91         a->reallocs++; \
92       } \
93       N = nrow++ - 1; a->nz++; \
94       /* shift up all the later entries in this row */ \
95       for ( ii=N; ii>=_i; ii-- ) { \
96         rp[ii+1] = rp[ii]; \
97         ap[ii+1] = ap[ii]; \
98       } \
99       rp[_i] = col1;  \
100       ap[_i] = value;  \
101       a_noinsert: ; \
102       ailen[row] = nrow; \
103 }
104 
105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
106 { \
107  \
108     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
109     rmax = bimax[row]; nrow = bilen[row];  \
110     col1 = col - shift; \
111      \
112     low = 0; high = nrow; \
113     while (high-low > 5) { \
114       t = (low+high)/2; \
115       if (rp[t] > col) high = t; \
116       else             low  = t; \
117     } \
118        for ( _i=0; _i<nrow; _i++ ) { \
119         if (rp[_i] > col1) break; \
120         if (rp[_i] == col1) { \
121           if (addv == ADD_VALUES) ap[_i] += value;   \
122           else                  ap[_i] = value; \
123           goto b_noinsert; \
124         } \
125       }  \
126       if (nonew == 1) goto b_noinsert; \
127       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
128       if (nrow >= rmax) { \
129         /* there is no extra room in row, therefore enlarge */ \
130         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
131         Scalar *new_a; \
132  \
133         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
134  \
135         /* malloc new storage space */ \
136         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
137         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
138         new_j   = (int *) (new_a + new_nz); \
139         new_i   = new_j + new_nz; \
140  \
141         /* copy over old data into new slots */ \
142         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
143         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
144         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
145         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
146         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
147                                                            len*sizeof(int)); \
148         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
149         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
150                                                            len*sizeof(Scalar));  \
151         /* free up old matrix storage */ \
152  \
153         PetscFree(b->a);  \
154         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
155         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
156         b->singlemalloc = 1; \
157  \
158         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
159         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
160         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
161         b->maxnz += CHUNKSIZE; \
162         b->reallocs++; \
163       } \
164       N = nrow++ - 1; b->nz++; \
165       /* shift up all the later entries in this row */ \
166       for ( ii=N; ii>=_i; ii-- ) { \
167         rp[ii+1] = rp[ii]; \
168         ap[ii+1] = ap[ii]; \
169       } \
170       rp[_i] = col1;  \
171       ap[_i] = value;  \
172       b_noinsert: ; \
173       bilen[row] = nrow; \
174 }
175 
176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
177 #undef __FUNC__
178 #define __FUNC__ "MatSetValues_MPIAIJ"
179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
180 {
181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
182   Scalar     value;
183   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
184   int        cstart = aij->cstart, cend = aij->cend,row,col;
185   int        roworiented = aij->roworiented;
186 
187   /* Some Variables required in the macro */
188   Mat        A = aij->A;
189   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
190   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
191   Scalar     *aa = a->a;
192 
193   Mat        B = aij->B;
194   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
195   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
196   Scalar     *ba = b->a;
197 
198   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
199   int        nonew = a->nonew,shift = a->indexshift;
200   Scalar     *ap;
201 
202   PetscFunctionBegin;
203   for ( i=0; i<m; i++ ) {
204 #if defined(USE_PETSC_BOPT_g)
205     if (im[i] < 0) SETERRQ(1,0,"Negative row");
206     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
207 #endif
208     if (im[i] >= rstart && im[i] < rend) {
209       row = im[i] - rstart;
210       for ( j=0; j<n; j++ ) {
211         if (in[j] >= cstart && in[j] < cend){
212           col = in[j] - cstart;
213           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
214           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
215           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
216         }
217 #if defined(USE_PETSC_BOPT_g)
218         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
219         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
220 #endif
221         else {
222           if (mat->was_assembled) {
223             if (!aij->colmap) {
224               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
225             }
226             col = aij->colmap[in[j]] - 1;
227             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
228               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
229               col =  in[j];
230               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
231               B = aij->B;
232               b = (Mat_SeqAIJ *) B->data;
233               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
234               ba = b->a;
235             }
236           }
237           else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     }
294     else {
295       SETERRQ(1,0,"Only local values currently supported");
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   PetscFunctionBegin;
315   /* make sure all processors are either in INSERTMODE or ADDMODE */
316   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
317   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318     SETERRQ(1,0,"Some processors inserted others added");
319   }
320   mat->insertmode = addv; /* in case this processor had no cache */
321 
322   /*  first count number of contributors to each processor */
323   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
325   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
326   for ( i=0; i<aij->stash.n; i++ ) {
327     idx = aij->stash.idx[i];
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
331       }
332     }
333   }
334   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
335 
336   /* inform other processors of number of messages and max length*/
337   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
339   nreceives = work[rank];
340   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
341   nmax = work[rank];
342   PetscFree(work);
343 
344   /* post receives:
345        1) each message will consist of ordered pairs
346      (global index,value) we store the global index as a double
347      to simplify the message passing.
348        2) since we don't know how long each individual message is we
349      allocate the largest needed buffer for each receive. Potentially
350      this is a lot of wasted space.
351 
352 
353        This could be done better.
354   */
355   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
357   for ( i=0; i<nreceives; i++ ) {
358     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
359               comm,recv_waits+i);CHKERRQ(ierr);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
367   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
368   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
369   starts[0]  = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<aij->stash.n; i++ ) {
372     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
373     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
374     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
375   }
376   PetscFree(owner);
377   starts[0] = 0;
378   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<size; i++ ) {
381     if (procs[i]) {
382       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   PetscFunctionReturn(0);
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   PetscFunctionBegin;
412   /*  wait on receives */
413   while (count) {
414     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
415     /* unpack receives into our local space */
416     values = aij->rvalues + 3*imdex*aij->rmax;
417     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
418     n = n/3;
419     for ( i=0; i<n; i++ ) {
420       row = (int) PetscReal(values[3*i]) - aij->rstart;
421       col = (int) PetscReal(values[3*i+1]);
422       val = values[3*i+2];
423       if (col >= aij->cstart && col < aij->cend) {
424         col -= aij->cstart;
425         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
426       } else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478 
479   PetscFunctionBegin;
480   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
481   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 /* the code does not do the diagonal entries correctly unless the
486    matrix is square and the column and row owerships are identical.
487    This is a BUG. The only way to fix it seems to be to access
488    aij->A and aij->B directly and not through the MatZeroRows()
489    routine.
490 */
491 #undef __FUNC__
492 #define __FUNC__ "MatZeroRows_MPIAIJ"
493 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
494 {
495   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
496   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
497   int            *procs,*nprocs,j,found,idx,nsends,*work;
498   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
499   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
500   int            *lens,imdex,*lrows,*values;
501   MPI_Comm       comm = A->comm;
502   MPI_Request    *send_waits,*recv_waits;
503   MPI_Status     recv_status,*send_status;
504   IS             istmp;
505 
506   PetscFunctionBegin;
507   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
508   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
509 
510   /*  first count number of contributors to each processor */
511   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
512   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
513   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
514   for ( i=0; i<N; i++ ) {
515     idx = rows[i];
516     found = 0;
517     for ( j=0; j<size; j++ ) {
518       if (idx >= owners[j] && idx < owners[j+1]) {
519         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
520       }
521     }
522     if (!found) SETERRQ(1,0,"Index out of range");
523   }
524   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
525 
526   /* inform other processors of number of messages and max length*/
527   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
528   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
529   nrecvs = work[rank];
530   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
531   nmax = work[rank];
532   PetscFree(work);
533 
534   /* post receives:   */
535   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
536   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
537   for ( i=0; i<nrecvs; i++ ) {
538     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
539   }
540 
541   /* do sends:
542       1) starts[i] gives the starting index in svalues for stuff going to
543          the ith processor
544   */
545   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
546   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
547   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
548   starts[0] = 0;
549   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
550   for ( i=0; i<N; i++ ) {
551     svalues[starts[owner[i]]++] = rows[i];
552   }
553   ISRestoreIndices(is,&rows);
554 
555   starts[0] = 0;
556   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
557   count = 0;
558   for ( i=0; i<size; i++ ) {
559     if (procs[i]) {
560       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
561     }
562   }
563   PetscFree(starts);
564 
565   base = owners[rank];
566 
567   /*  wait on receives */
568   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
569   source = lens + nrecvs;
570   count  = nrecvs; slen = 0;
571   while (count) {
572     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
573     /* unpack receives into our local space */
574     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
575     source[imdex]  = recv_status.MPI_SOURCE;
576     lens[imdex]  = n;
577     slen += n;
578     count--;
579   }
580   PetscFree(recv_waits);
581 
582   /* move the data into the send scatter */
583   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
584   count = 0;
585   for ( i=0; i<nrecvs; i++ ) {
586     values = rvalues + i*nmax;
587     for ( j=0; j<lens[i]; j++ ) {
588       lrows[count++] = values[j] - base;
589     }
590   }
591   PetscFree(rvalues); PetscFree(lens);
592   PetscFree(owner); PetscFree(nprocs);
593 
594   /* actually zap the local rows */
595   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
596   PLogObjectParent(A,istmp);
597   PetscFree(lrows);
598   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
599   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
600   ierr = ISDestroy(istmp); CHKERRQ(ierr);
601 
602   /* wait on sends */
603   if (nsends) {
604     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
605     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
606     PetscFree(send_status);
607   }
608   PetscFree(send_waits); PetscFree(svalues);
609 
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatMult_MPIAIJ"
615 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
618   int        ierr,nt;
619 
620   PetscFunctionBegin;
621   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
622   if (nt != a->n) {
623     SETERRQ(1,0,"Incompatible partition of A and xx");
624   }
625   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
626   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
627   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
628   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNC__
633 #define __FUNC__ "MatMultAdd_MPIAIJ"
634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
635 {
636   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
637   int        ierr;
638 
639   PetscFunctionBegin;
640   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
641   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
642   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
643   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNC__
648 #define __FUNC__ "MatMultTrans_MPIAIJ"
649 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
650 {
651   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
652   int        ierr;
653 
654   PetscFunctionBegin;
655   /* do nondiagonal part */
656   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
657   /* send it on its way */
658   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
659   /* do local part */
660   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
661   /* receive remote parts: note this assumes the values are not actually */
662   /* inserted in yy until the next line, which is true for my implementation*/
663   /* but is not perhaps always true. */
664   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
673   int        ierr;
674 
675   PetscFunctionBegin;
676   /* do nondiagonal part */
677   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
678   /* send it on its way */
679   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
680   /* do local part */
681   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
682   /* receive remote parts: note this assumes the values are not actually */
683   /* inserted in yy until the next line, which is true for my implementation*/
684   /* but is not perhaps always true. */
685   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 /*
690   This only works correctly for square matrices where the subblock A->A is the
691    diagonal block
692 */
693 #undef __FUNC__
694 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
695 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
696 {
697   int        ierr;
698   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
699 
700   PetscFunctionBegin;
701   if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
702   if (a->rstart != a->cstart || a->rend != a->cend) {
703     SETERRQ(1,0,"row partition must equal col partition");
704   }
705   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNC__
710 #define __FUNC__ "MatScale_MPIAIJ"
711 int MatScale_MPIAIJ(Scalar *aa,Mat A)
712 {
713   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
714   int        ierr;
715 
716   PetscFunctionBegin;
717   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
718   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNC__
723 #define __FUNC__ "MatDestroy_MPIAIJ"
724 int MatDestroy_MPIAIJ(PetscObject obj)
725 {
726   Mat        mat = (Mat) obj;
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
728   int        ierr;
729 
730   PetscFunctionBegin;
731 #if defined(USE_PETSC_LOG)
732   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
733 #endif
734   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
735   PetscFree(aij->rowners);
736   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
738   if (aij->colmap) PetscFree(aij->colmap);
739   if (aij->garray) PetscFree(aij->garray);
740   if (aij->lvec)   VecDestroy(aij->lvec);
741   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
742   if (aij->rowvalues) PetscFree(aij->rowvalues);
743   PetscFree(aij);
744   PLogObjectDestroy(mat);
745   PetscHeaderDestroy(mat);
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNC__
750 #define __FUNC__ "MatView_MPIAIJ_Binary"
751 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
752 {
753   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
754   int         ierr;
755 
756   PetscFunctionBegin;
757   if (aij->size == 1) {
758     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
759   }
760   else SETERRQ(1,0,"Only uniprocessor output supported");
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNC__
765 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
766 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
767 {
768   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
769   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
770   int         ierr, format,shift = C->indexshift,rank;
771   FILE        *fd;
772   ViewerType  vtype;
773 
774   PetscFunctionBegin;
775   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
776   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
777     ierr = ViewerGetFormat(viewer,&format);
778     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
779       MatInfo info;
780       int     flg;
781       MPI_Comm_rank(mat->comm,&rank);
782       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
783       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
784       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
785       PetscSequentialPhaseBegin(mat->comm,1);
786       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
787          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
788       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
789          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
790       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
791       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
792       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
793       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
794       fflush(fd);
795       PetscSequentialPhaseEnd(mat->comm,1);
796       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
797       PetscFunctionReturn(0);
798     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
799       PetscFunctionReturn(0);
800     }
801   }
802 
803   if (vtype == DRAW_VIEWER) {
804     Draw       draw;
805     PetscTruth isnull;
806     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
807     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
808   }
809 
810   if (vtype == ASCII_FILE_VIEWER) {
811     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
812     PetscSequentialPhaseBegin(mat->comm,1);
813     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
814            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
815            aij->cend);
816     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
817     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
818     fflush(fd);
819     PetscSequentialPhaseEnd(mat->comm,1);
820   } else {
821     int size = aij->size;
822     rank = aij->rank;
823     if (size == 1) {
824       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
825     } else {
826       /* assemble the entire matrix onto first processor. */
827       Mat         A;
828       Mat_SeqAIJ *Aloc;
829       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
830       Scalar      *a;
831 
832       if (!rank) {
833         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
834       } else {
835         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
836       }
837       PLogObjectParent(mat,A);
838 
839       /* copy over the A part */
840       Aloc = (Mat_SeqAIJ*) aij->A->data;
841       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
842       row = aij->rstart;
843       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
844       for ( i=0; i<m; i++ ) {
845         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
846         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
847       }
848       aj = Aloc->j;
849       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
850 
851       /* copy over the B part */
852       Aloc = (Mat_SeqAIJ*) aij->B->data;
853       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
854       row = aij->rstart;
855       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
856       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
857       for ( i=0; i<m; i++ ) {
858         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
859         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
860       }
861       PetscFree(ct);
862       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
863       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
864       /*
865          Everyone has to call to draw the matrix since the graphics waits are
866          synchronized across all processors that share the Draw object
867       */
868       if (!rank || vtype == DRAW_VIEWER) {
869         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
870       }
871       ierr = MatDestroy(A); CHKERRQ(ierr);
872     }
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNC__
878 #define __FUNC__ "MatView_MPIAIJ"
879 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
880 {
881   Mat         mat = (Mat) obj;
882   int         ierr;
883   ViewerType  vtype;
884 
885   PetscFunctionBegin;
886   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
887   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
888       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
889     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
890   }
891   else if (vtype == BINARY_FILE_VIEWER) {
892     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
893   }
894   PetscFunctionReturn(0);
895 }
896 
897 /*
898     This has to provide several versions.
899 
900      2) a) use only local smoothing updating outer values only once.
901         b) local smoothing updating outer values each inner iteration
902      3) color updating out values betwen colors.
903 */
904 #undef __FUNC__
905 #define __FUNC__ "MatRelax_MPIAIJ"
906 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
907                            double fshift,int its,Vec xx)
908 {
909   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
910   Mat        AA = mat->A, BB = mat->B;
911   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
912   Scalar     *b,*x,*xs,*ls,d,*v,sum;
913   int        ierr,*idx, *diag;
914   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
915 
916   PetscFunctionBegin;
917   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
918   xs = x + shift; /* shift by one for index start of 1 */
919   ls = ls + shift;
920   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
921   diag = A->diag;
922   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
923     if (flag & SOR_ZERO_INITIAL_GUESS) {
924       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
925       PetscFunctionReturn(0);
926     }
927     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
928     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
929     while (its--) {
930       /* go down through the rows */
931       for ( i=0; i<m; i++ ) {
932         n    = A->i[i+1] - A->i[i];
933         idx  = A->j + A->i[i] + shift;
934         v    = A->a + A->i[i] + shift;
935         sum  = b[i];
936         SPARSEDENSEMDOT(sum,xs,v,idx,n);
937         d    = fshift + A->a[diag[i]+shift];
938         n    = B->i[i+1] - B->i[i];
939         idx  = B->j + B->i[i] + shift;
940         v    = B->a + B->i[i] + shift;
941         SPARSEDENSEMDOT(sum,ls,v,idx,n);
942         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
943       }
944       /* come up through the rows */
945       for ( i=m-1; i>-1; i-- ) {
946         n    = A->i[i+1] - A->i[i];
947         idx  = A->j + A->i[i] + shift;
948         v    = A->a + A->i[i] + shift;
949         sum  = b[i];
950         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951         d    = fshift + A->a[diag[i]+shift];
952         n    = B->i[i+1] - B->i[i];
953         idx  = B->j + B->i[i] + shift;
954         v    = B->a + B->i[i] + shift;
955         SPARSEDENSEMDOT(sum,ls,v,idx,n);
956         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
957       }
958     }
959   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
960     if (flag & SOR_ZERO_INITIAL_GUESS) {
961       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
962       PetscFunctionReturn(0);
963     }
964     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
965     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
966     while (its--) {
967       for ( i=0; i<m; i++ ) {
968         n    = A->i[i+1] - A->i[i];
969         idx  = A->j + A->i[i] + shift;
970         v    = A->a + A->i[i] + shift;
971         sum  = b[i];
972         SPARSEDENSEMDOT(sum,xs,v,idx,n);
973         d    = fshift + A->a[diag[i]+shift];
974         n    = B->i[i+1] - B->i[i];
975         idx  = B->j + B->i[i] + shift;
976         v    = B->a + B->i[i] + shift;
977         SPARSEDENSEMDOT(sum,ls,v,idx,n);
978         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
979       }
980     }
981   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
982     if (flag & SOR_ZERO_INITIAL_GUESS) {
983       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     }
986     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
987                             mat->Mvctx); CHKERRQ(ierr);
988     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
989                             mat->Mvctx); CHKERRQ(ierr);
990     while (its--) {
991       for ( i=m-1; i>-1; i-- ) {
992         n    = A->i[i+1] - A->i[i];
993         idx  = A->j + A->i[i] + shift;
994         v    = A->a + A->i[i] + shift;
995         sum  = b[i];
996         SPARSEDENSEMDOT(sum,xs,v,idx,n);
997         d    = fshift + A->a[diag[i]+shift];
998         n    = B->i[i+1] - B->i[i];
999         idx  = B->j + B->i[i] + shift;
1000         v    = B->a + B->i[i] + shift;
1001         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1002         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1003       }
1004     }
1005   } else {
1006     SETERRQ(1,0,"Option not supported");
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNC__
1012 #define __FUNC__ "MatGetInfo_MPIAIJ"
1013 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1014 {
1015   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1016   Mat        A = mat->A, B = mat->B;
1017   int        ierr;
1018   double     isend[5], irecv[5];
1019 
1020   PetscFunctionBegin;
1021   info->rows_global    = (double)mat->M;
1022   info->columns_global = (double)mat->N;
1023   info->rows_local     = (double)mat->m;
1024   info->columns_local  = (double)mat->N;
1025   info->block_size     = 1.0;
1026   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1027   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1028   isend[3] = info->memory;  isend[4] = info->mallocs;
1029   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1030   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1031   isend[3] += info->memory;  isend[4] += info->mallocs;
1032   if (flag == MAT_LOCAL) {
1033     info->nz_used      = isend[0];
1034     info->nz_allocated = isend[1];
1035     info->nz_unneeded  = isend[2];
1036     info->memory       = isend[3];
1037     info->mallocs      = isend[4];
1038   } else if (flag == MAT_GLOBAL_MAX) {
1039     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1040     info->nz_used      = irecv[0];
1041     info->nz_allocated = irecv[1];
1042     info->nz_unneeded  = irecv[2];
1043     info->memory       = irecv[3];
1044     info->mallocs      = irecv[4];
1045   } else if (flag == MAT_GLOBAL_SUM) {
1046     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1047     info->nz_used      = irecv[0];
1048     info->nz_allocated = irecv[1];
1049     info->nz_unneeded  = irecv[2];
1050     info->memory       = irecv[3];
1051     info->mallocs      = irecv[4];
1052   }
1053   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1054   info->fill_ratio_needed = 0;
1055   info->factor_mallocs    = 0;
1056 
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNC__
1061 #define __FUNC__ "MatSetOption_MPIAIJ"
1062 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1063 {
1064   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1065 
1066   PetscFunctionBegin;
1067   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1068       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1069       op == MAT_COLUMNS_UNSORTED ||
1070       op == MAT_COLUMNS_SORTED ||
1071       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1072       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1073         MatSetOption(a->A,op);
1074         MatSetOption(a->B,op);
1075   } else if (op == MAT_ROW_ORIENTED) {
1076     a->roworiented = 1;
1077     MatSetOption(a->A,op);
1078     MatSetOption(a->B,op);
1079   } else if (op == MAT_ROWS_SORTED ||
1080              op == MAT_ROWS_UNSORTED ||
1081              op == MAT_SYMMETRIC ||
1082              op == MAT_STRUCTURALLY_SYMMETRIC ||
1083              op == MAT_YES_NEW_DIAGONALS)
1084     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1085   else if (op == MAT_COLUMN_ORIENTED) {
1086     a->roworiented = 0;
1087     MatSetOption(a->A,op);
1088     MatSetOption(a->B,op);
1089   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1090     a->donotstash = 1;
1091   } else if (op == MAT_NO_NEW_DIAGONALS){
1092     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1093   } else {
1094     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetSize_MPIAIJ"
1101 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104 
1105   PetscFunctionBegin;
1106   if (m) *m = mat->M;
1107   if (n) *n = mat->N;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNC__
1112 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1113 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1114 {
1115   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1116 
1117   PetscFunctionBegin;
1118   if (m) *m = mat->m;
1119   if (n) *n = mat->N;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1125 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1126 {
1127   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1128 
1129   PetscFunctionBegin;
1130   *m = mat->rstart; *n = mat->rend;
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1135 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1136 
1137 #undef __FUNC__
1138 #define __FUNC__ "MatGetRow_MPIAIJ"
1139 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1140 {
1141   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1142   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1143   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1144   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1145   int        *cmap, *idx_p;
1146 
1147   PetscFunctionBegin;
1148   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1149   mat->getrowactive = PETSC_TRUE;
1150 
1151   if (!mat->rowvalues && (idx || v)) {
1152     /*
1153         allocate enough space to hold information from the longest row.
1154     */
1155     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1156     int     max = 1,m = mat->m,tmp;
1157     for ( i=0; i<m; i++ ) {
1158       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1159       if (max < tmp) { max = tmp; }
1160     }
1161     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1162     CHKPTRQ(mat->rowvalues);
1163     mat->rowindices = (int *) (mat->rowvalues + max);
1164   }
1165 
1166   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1167   lrow = row - rstart;
1168 
1169   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1170   if (!v)   {pvA = 0; pvB = 0;}
1171   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1172   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1173   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1174   nztot = nzA + nzB;
1175 
1176   cmap  = mat->garray;
1177   if (v  || idx) {
1178     if (nztot) {
1179       /* Sort by increasing column numbers, assuming A and B already sorted */
1180       int imark = -1;
1181       if (v) {
1182         *v = v_p = mat->rowvalues;
1183         for ( i=0; i<nzB; i++ ) {
1184           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1185           else break;
1186         }
1187         imark = i;
1188         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1189         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1190       }
1191       if (idx) {
1192         *idx = idx_p = mat->rowindices;
1193         if (imark > -1) {
1194           for ( i=0; i<imark; i++ ) {
1195             idx_p[i] = cmap[cworkB[i]];
1196           }
1197         } else {
1198           for ( i=0; i<nzB; i++ ) {
1199             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1200             else break;
1201           }
1202           imark = i;
1203         }
1204         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1205         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1206       }
1207     }
1208     else {
1209       if (idx) *idx = 0;
1210       if (v)   *v   = 0;
1211     }
1212   }
1213   *nz = nztot;
1214   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1215   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNC__
1220 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1221 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1222 {
1223   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1224 
1225   PetscFunctionBegin;
1226   if (aij->getrowactive == PETSC_FALSE) {
1227     SETERRQ(1,0,"MatGetRow not called");
1228   }
1229   aij->getrowactive = PETSC_FALSE;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNC__
1234 #define __FUNC__ "MatNorm_MPIAIJ"
1235 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1236 {
1237   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1238   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1239   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1240   double     sum = 0.0;
1241   Scalar     *v;
1242 
1243   PetscFunctionBegin;
1244   if (aij->size == 1) {
1245     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1246   } else {
1247     if (type == NORM_FROBENIUS) {
1248       v = amat->a;
1249       for (i=0; i<amat->nz; i++ ) {
1250 #if defined(USE_PETSC_COMPLEX)
1251         sum += real(conj(*v)*(*v)); v++;
1252 #else
1253         sum += (*v)*(*v); v++;
1254 #endif
1255       }
1256       v = bmat->a;
1257       for (i=0; i<bmat->nz; i++ ) {
1258 #if defined(USE_PETSC_COMPLEX)
1259         sum += real(conj(*v)*(*v)); v++;
1260 #else
1261         sum += (*v)*(*v); v++;
1262 #endif
1263       }
1264       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1265       *norm = sqrt(*norm);
1266     } else if (type == NORM_1) { /* max column norm */
1267       double *tmp, *tmp2;
1268       int    *jj, *garray = aij->garray;
1269       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1270       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1271       PetscMemzero(tmp,aij->N*sizeof(double));
1272       *norm = 0.0;
1273       v = amat->a; jj = amat->j;
1274       for ( j=0; j<amat->nz; j++ ) {
1275         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1276       }
1277       v = bmat->a; jj = bmat->j;
1278       for ( j=0; j<bmat->nz; j++ ) {
1279         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1280       }
1281       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1282       for ( j=0; j<aij->N; j++ ) {
1283         if (tmp2[j] > *norm) *norm = tmp2[j];
1284       }
1285       PetscFree(tmp); PetscFree(tmp2);
1286     } else if (type == NORM_INFINITY) { /* max row norm */
1287       double ntemp = 0.0;
1288       for ( j=0; j<amat->m; j++ ) {
1289         v = amat->a + amat->i[j] + shift;
1290         sum = 0.0;
1291         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1292           sum += PetscAbsScalar(*v); v++;
1293         }
1294         v = bmat->a + bmat->i[j] + shift;
1295         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1296           sum += PetscAbsScalar(*v); v++;
1297         }
1298         if (sum > ntemp) ntemp = sum;
1299       }
1300       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1301     } else {
1302       SETERRQ(1,0,"No support for two norm");
1303     }
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNC__
1309 #define __FUNC__ "MatTranspose_MPIAIJ"
1310 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1311 {
1312   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1313   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1314   int        ierr,shift = Aloc->indexshift;
1315   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1316   Mat        B;
1317   Scalar     *array;
1318 
1319   PetscFunctionBegin;
1320   if (matout == PETSC_NULL && M != N) {
1321     SETERRQ(1,0,"Square matrix only for in-place");
1322   }
1323 
1324   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1325 
1326   /* copy over the A part */
1327   Aloc = (Mat_SeqAIJ*) a->A->data;
1328   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1329   row = a->rstart;
1330   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1331   for ( i=0; i<m; i++ ) {
1332     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1333     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1334   }
1335   aj = Aloc->j;
1336   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1337 
1338   /* copy over the B part */
1339   Aloc = (Mat_SeqAIJ*) a->B->data;
1340   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1341   row = a->rstart;
1342   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1343   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1344   for ( i=0; i<m; i++ ) {
1345     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1346     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1347   }
1348   PetscFree(ct);
1349   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1350   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1351   if (matout != PETSC_NULL) {
1352     *matout = B;
1353   } else {
1354     /* This isn't really an in-place transpose .... but free data structures from a */
1355     PetscFree(a->rowners);
1356     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1357     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1358     if (a->colmap) PetscFree(a->colmap);
1359     if (a->garray) PetscFree(a->garray);
1360     if (a->lvec) VecDestroy(a->lvec);
1361     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1362     PetscFree(a);
1363     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1364     PetscHeaderDestroy(B);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNC__
1370 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1371 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1372 {
1373   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1374   Mat a = aij->A, b = aij->B;
1375   int ierr,s1,s2,s3;
1376 
1377   PetscFunctionBegin;
1378   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1379   if (rr) {
1380     s3 = aij->n;
1381     VecGetLocalSize_Fast(rr,s1);
1382     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1383     /* Overlap communication with computation. */
1384     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1385   }
1386   if (ll) {
1387     VecGetLocalSize_Fast(ll,s1);
1388     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1389     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1390   }
1391   /* scale  the diagonal block */
1392   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1393 
1394   if (rr) {
1395     /* Do a scatter end and then right scale the off-diagonal block */
1396     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1397     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1398   }
1399 
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 
1404 extern int MatPrintHelp_SeqAIJ(Mat);
1405 #undef __FUNC__
1406 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1407 int MatPrintHelp_MPIAIJ(Mat A)
1408 {
1409   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1410   int        ierr;
1411 
1412   PetscFunctionBegin;
1413   if (!a->rank) {
1414     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1415   }
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNC__
1420 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1421 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1422 {
1423   PetscFunctionBegin;
1424   *bs = 1;
1425   PetscFunctionReturn(0);
1426 }
1427 #undef __FUNC__
1428 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1429 int MatSetUnfactored_MPIAIJ(Mat A)
1430 {
1431   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1432   int        ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNC__
1440 #define __FUNC__ "MatEqual_MPIAIJ"
1441 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1442 {
1443   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1444   Mat        a, b, c, d;
1445   PetscTruth flg;
1446   int        ierr;
1447 
1448   PetscFunctionBegin;
1449   if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type");
1450   a = matA->A; b = matA->B;
1451   c = matB->A; d = matB->B;
1452 
1453   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1454   if (flg == PETSC_TRUE) {
1455     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1456   }
1457   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1462 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1463 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1464 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1465 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *);
1466 
1467 /* -------------------------------------------------------------------*/
1468 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1469        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1470        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1471        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1472        0,0,
1473        0,0,
1474        0,0,
1475        MatRelax_MPIAIJ,
1476        MatTranspose_MPIAIJ,
1477        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1478        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1479        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1480        0,
1481        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1482        0,0,0,0,
1483        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1484        0,0,
1485        0,0,MatConvertSameType_MPIAIJ,0,0,
1486        0,0,0,
1487        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1488        MatPrintHelp_MPIAIJ,
1489        MatScale_MPIAIJ,0,0,0,
1490        MatGetBlockSize_MPIAIJ,0,0,0,0,
1491        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1492        0,0,MatGetSubMatrix_MPIAIJ};
1493 
1494 
1495 #undef __FUNC__
1496 #define __FUNC__ "MatCreateMPIAIJ"
1497 /*@C
1498    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1499    (the default parallel PETSc format).  For good matrix assembly performance
1500    the user should preallocate the matrix storage by setting the parameters
1501    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1502    performance can be increased by more than a factor of 50.
1503 
1504    Input Parameters:
1505 .  comm - MPI communicator
1506 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1507            This value should be the same as the local size used in creating the
1508            y vector for the matrix-vector product y = Ax.
1509 .  n - This value should be the same as the local size used in creating the
1510        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1511        calculated if N is given) For square matrices n is almost always m.
1512 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1513 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1514 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1515            (same for all local rows)
1516 .  d_nzz - array containing the number of nonzeros in the various rows of the
1517            diagonal portion of the local submatrix (possibly different for each row)
1518            or PETSC_NULL. You must leave room for the diagonal entry even if
1519            it is zero.
1520 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1521            submatrix (same for all local rows).
1522 .  o_nzz - array containing the number of nonzeros in the various rows of the
1523            off-diagonal portion of the local submatrix (possibly different for
1524            each row) or PETSC_NULL.
1525 
1526    Output Parameter:
1527 .  A - the matrix
1528 
1529    Notes:
1530    The AIJ format (also called the Yale sparse matrix format or
1531    compressed row storage), is fully compatible with standard Fortran 77
1532    storage.  That is, the stored row and column indices can begin at
1533    either one (as in Fortran) or zero.  See the users manual for details.
1534 
1535    The user MUST specify either the local or global matrix dimensions
1536    (possibly both).
1537 
1538    By default, this format uses inodes (identical nodes) when possible.
1539    We search for consecutive rows with the same nonzero structure, thereby
1540    reusing matrix information to achieve increased efficiency.
1541 
1542    Options Database Keys:
1543 $    -mat_aij_no_inode  - Do not use inodes
1544 $    -mat_aij_inode_limit <limit> - Set inode limit.
1545 $        (max limit=5)
1546 $    -mat_aij_oneindex - Internally use indexing starting at 1
1547 $        rather than 0.  Note: When calling MatSetValues(),
1548 $        the user still MUST index entries starting at 0!
1549 
1550    Storage Information:
1551    For a square global matrix we define each processor's diagonal portion
1552    to be its local rows and the corresponding columns (a square submatrix);
1553    each processor's off-diagonal portion encompasses the remainder of the
1554    local matrix (a rectangular submatrix).
1555 
1556    The user can specify preallocated storage for the diagonal part of
1557    the local submatrix with either d_nz or d_nnz (not both).  Set
1558    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1559    memory allocation.  Likewise, specify preallocated storage for the
1560    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1561 
1562    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1563    the figure below we depict these three local rows and all columns (0-11).
1564 
1565 $          0 1 2 3 4 5 6 7 8 9 10 11
1566 $         -------------------
1567 $  row 3  |  o o o d d d o o o o o o
1568 $  row 4  |  o o o d d d o o o o o o
1569 $  row 5  |  o o o d d d o o o o o o
1570 $         -------------------
1571 $
1572 
1573    Thus, any entries in the d locations are stored in the d (diagonal)
1574    submatrix, and any entries in the o locations are stored in the
1575    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1576    stored simply in the MATSEQAIJ format for compressed row storage.
1577 
1578    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1579    and o_nz should indicate the number of nonzeros per row in the o matrix.
1580    In general, for PDE problems in which most nonzeros are near the diagonal,
1581    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1582    or you will get TERRIBLE performance; see the users' manual chapter on
1583    matrices.
1584 
1585 .keywords: matrix, aij, compressed row, sparse, parallel
1586 
1587 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1588 @*/
1589 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1590                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1591 {
1592   Mat          B;
1593   Mat_MPIAIJ   *b;
1594   int          ierr, i,sum[2],work[2],size;
1595 
1596   PetscFunctionBegin;
1597   MPI_Comm_size(comm,&size);
1598   if (size == 1) {
1599     if (M == PETSC_DECIDE) M = m;
1600     if (N == PETSC_DECIDE) N = n;
1601     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1602     PetscFunctionReturn(0);
1603   }
1604 
1605   *A = 0;
1606   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1607   PLogObjectCreate(B);
1608   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1609   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1610   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1611   B->destroy    = MatDestroy_MPIAIJ;
1612   B->view       = MatView_MPIAIJ;
1613   B->factor     = 0;
1614   B->assembled  = PETSC_FALSE;
1615   B->mapping    = 0;
1616 
1617   B->insertmode = NOT_SET_VALUES;
1618   b->size       = size;
1619   MPI_Comm_rank(comm,&b->rank);
1620 
1621   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1622     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1623   }
1624 
1625   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1626     work[0] = m; work[1] = n;
1627     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1628     if (M == PETSC_DECIDE) M = sum[0];
1629     if (N == PETSC_DECIDE) N = sum[1];
1630   }
1631   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1632   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1633   b->m = m; B->m = m;
1634   b->n = n; B->n = n;
1635   b->N = N; B->N = N;
1636   b->M = M; B->M = M;
1637 
1638   /* build local table of row and column ownerships */
1639   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1640   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1641   b->cowners = b->rowners + b->size + 2;
1642   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1643   b->rowners[0] = 0;
1644   for ( i=2; i<=b->size; i++ ) {
1645     b->rowners[i] += b->rowners[i-1];
1646   }
1647   b->rstart = b->rowners[b->rank];
1648   b->rend   = b->rowners[b->rank+1];
1649   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1650   b->cowners[0] = 0;
1651   for ( i=2; i<=b->size; i++ ) {
1652     b->cowners[i] += b->cowners[i-1];
1653   }
1654   b->cstart = b->cowners[b->rank];
1655   b->cend   = b->cowners[b->rank+1];
1656 
1657   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1658   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1659   PLogObjectParent(B,b->A);
1660   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1661   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1662   PLogObjectParent(B,b->B);
1663 
1664   /* build cache for off array entries formed */
1665   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1666   b->donotstash  = 0;
1667   b->colmap      = 0;
1668   b->garray      = 0;
1669   b->roworiented = 1;
1670 
1671   /* stuff used for matrix vector multiply */
1672   b->lvec      = 0;
1673   b->Mvctx     = 0;
1674 
1675   /* stuff for MatGetRow() */
1676   b->rowindices   = 0;
1677   b->rowvalues    = 0;
1678   b->getrowactive = PETSC_FALSE;
1679 
1680   *A = B;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNC__
1685 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1686 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1687 {
1688   Mat        mat;
1689   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1690   int        ierr, len=0, flg;
1691 
1692   PetscFunctionBegin;
1693   *newmat       = 0;
1694   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1695   PLogObjectCreate(mat);
1696   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1697   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1698   mat->destroy    = MatDestroy_MPIAIJ;
1699   mat->view       = MatView_MPIAIJ;
1700   mat->factor     = matin->factor;
1701   mat->assembled  = PETSC_TRUE;
1702 
1703   a->m = mat->m   = oldmat->m;
1704   a->n = mat->n   = oldmat->n;
1705   a->M = mat->M   = oldmat->M;
1706   a->N = mat->N   = oldmat->N;
1707 
1708   a->rstart       = oldmat->rstart;
1709   a->rend         = oldmat->rend;
1710   a->cstart       = oldmat->cstart;
1711   a->cend         = oldmat->cend;
1712   a->size         = oldmat->size;
1713   a->rank         = oldmat->rank;
1714   mat->insertmode = NOT_SET_VALUES;
1715   a->rowvalues    = 0;
1716   a->getrowactive = PETSC_FALSE;
1717 
1718   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1719   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1720   a->cowners = a->rowners + a->size + 2;
1721   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1722   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1723   if (oldmat->colmap) {
1724     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1725     PLogObjectMemory(mat,(a->N)*sizeof(int));
1726     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1727   } else a->colmap = 0;
1728   if (oldmat->garray) {
1729     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1730     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1731     PLogObjectMemory(mat,len*sizeof(int));
1732     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1733   } else a->garray = 0;
1734 
1735   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1736   PLogObjectParent(mat,a->lvec);
1737   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1738   PLogObjectParent(mat,a->Mvctx);
1739   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1740   PLogObjectParent(mat,a->A);
1741   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1742   PLogObjectParent(mat,a->B);
1743   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1744   if (flg) {
1745     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1746   }
1747   *newmat = mat;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 #include "sys.h"
1752 
1753 #undef __FUNC__
1754 #define __FUNC__ "MatLoad_MPIAIJ"
1755 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1756 {
1757   Mat          A;
1758   Scalar       *vals,*svals;
1759   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1760   MPI_Status   status;
1761   int          i, nz, ierr, j,rstart, rend, fd;
1762   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1763   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1764   int          tag = ((PetscObject)viewer)->tag;
1765 
1766   PetscFunctionBegin;
1767   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1768   if (!rank) {
1769     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1770     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1771     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1772     if (header[3] < 0) {
1773       SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ");
1774     }
1775   }
1776 
1777 
1778   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1779   M = header[1]; N = header[2];
1780   /* determine ownership of all rows */
1781   m = M/size + ((M % size) > rank);
1782   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1783   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1784   rowners[0] = 0;
1785   for ( i=2; i<=size; i++ ) {
1786     rowners[i] += rowners[i-1];
1787   }
1788   rstart = rowners[rank];
1789   rend   = rowners[rank+1];
1790 
1791   /* distribute row lengths to all processors */
1792   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1793   offlens = ourlens + (rend-rstart);
1794   if (!rank) {
1795     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1796     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1797     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1798     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1799     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1800     PetscFree(sndcounts);
1801   } else {
1802     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1803   }
1804 
1805   if (!rank) {
1806     /* calculate the number of nonzeros on each processor */
1807     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1808     PetscMemzero(procsnz,size*sizeof(int));
1809     for ( i=0; i<size; i++ ) {
1810       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1811         procsnz[i] += rowlengths[j];
1812       }
1813     }
1814     PetscFree(rowlengths);
1815 
1816     /* determine max buffer needed and allocate it */
1817     maxnz = 0;
1818     for ( i=0; i<size; i++ ) {
1819       maxnz = PetscMax(maxnz,procsnz[i]);
1820     }
1821     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1822 
1823     /* read in my part of the matrix column indices  */
1824     nz     = procsnz[0];
1825     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1826     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1827 
1828     /* read in every one elses and ship off */
1829     for ( i=1; i<size; i++ ) {
1830       nz   = procsnz[i];
1831       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1832       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1833     }
1834     PetscFree(cols);
1835   } else {
1836     /* determine buffer space needed for message */
1837     nz = 0;
1838     for ( i=0; i<m; i++ ) {
1839       nz += ourlens[i];
1840     }
1841     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1842 
1843     /* receive message of column indices*/
1844     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1845     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1846     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1847   }
1848 
1849   /* loop over local rows, determining number of off diagonal entries */
1850   PetscMemzero(offlens,m*sizeof(int));
1851   jj = 0;
1852   for ( i=0; i<m; i++ ) {
1853     for ( j=0; j<ourlens[i]; j++ ) {
1854       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1855       jj++;
1856     }
1857   }
1858 
1859   /* create our matrix */
1860   for ( i=0; i<m; i++ ) {
1861     ourlens[i] -= offlens[i];
1862   }
1863   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1864   A = *newmat;
1865   MatSetOption(A,MAT_COLUMNS_SORTED);
1866   for ( i=0; i<m; i++ ) {
1867     ourlens[i] += offlens[i];
1868   }
1869 
1870   if (!rank) {
1871     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1872 
1873     /* read in my part of the matrix numerical values  */
1874     nz = procsnz[0];
1875     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1876 
1877     /* insert into matrix */
1878     jj      = rstart;
1879     smycols = mycols;
1880     svals   = vals;
1881     for ( i=0; i<m; i++ ) {
1882       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1883       smycols += ourlens[i];
1884       svals   += ourlens[i];
1885       jj++;
1886     }
1887 
1888     /* read in other processors and ship out */
1889     for ( i=1; i<size; i++ ) {
1890       nz   = procsnz[i];
1891       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1892       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1893     }
1894     PetscFree(procsnz);
1895   } else {
1896     /* receive numeric values */
1897     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1898 
1899     /* receive message of values*/
1900     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1901     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1902     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1903 
1904     /* insert into matrix */
1905     jj      = rstart;
1906     smycols = mycols;
1907     svals   = vals;
1908     for ( i=0; i<m; i++ ) {
1909       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1910       smycols += ourlens[i];
1911       svals   += ourlens[i];
1912       jj++;
1913     }
1914   }
1915   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1916 
1917   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1918   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNC__
1923 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1924 /*
1925     Not great since it makes two copies of the submatrix, first an SeqAIJ
1926   in local and then by concatenating the local matrices the end result.
1927   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1928 */
1929 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat)
1930 {
1931   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1932   Mat        *local,M;
1933   Scalar     *vwork,*aa;
1934   MPI_Comm   comm = mat->comm;
1935   Mat_SeqAIJ *aij;
1936   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1937 
1938   PetscFunctionBegin;
1939   MPI_Comm_rank(comm,&rank);
1940   MPI_Comm_size(comm,&size);
1941 
1942   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1943 
1944   /*
1945       m - number of local rows
1946       n - number of columns (same on all processors)
1947       rstart - first row in new global matrix generated
1948   */
1949   ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr);
1950   if (call == MAT_INITIAL_MATRIX) {
1951     aij = (Mat_SeqAIJ *) (*local)->data;
1952     if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix");
1953     ii  = aij->i;
1954     jj  = aij->j;
1955 
1956     /*
1957         Determine the number of non-zeros in the diagonal and off-diagonal
1958         portions of the matrix in order to do correct preallocation
1959     */
1960 
1961     /* first get start and end of "diagonal" columns */
1962     nlocal = n/size + ((n % size) > rank);
1963     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1964     rstart = rend - nlocal;
1965 
1966     /* next, compute all the lengths */
1967     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
1968     olens = dlens + m;
1969     for ( i=0; i<m; i++ ) {
1970       jend = ii[i+1] - ii[i];
1971       olen = 0;
1972       dlen = 0;
1973       for ( j=0; j<jend; j++ ) {
1974         if ( *jj < rstart || *jj >= rend) olen++;
1975         else dlen++;
1976         jj++;
1977       }
1978       olens[i] = olen;
1979       dlens[i] = dlen;
1980     }
1981     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1982     PetscFree(dlens);
1983   } else {
1984     int ml,nl;
1985 
1986     M = *newmat;
1987     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1988     if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request");
1989     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1990   }
1991   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
1992   aij = (Mat_SeqAIJ *) (*local)->data;
1993   if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix");
1994   ii  = aij->i;
1995   jj  = aij->j;
1996   aa  = aij->a;
1997   for (i=0; i<m; i++) {
1998     row   = rstart + i;
1999     nz    = ii[i+1] - ii[i];
2000     cwork = jj;     jj += nz;
2001     vwork = aa;     aa += nz;
2002     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2003   }
2004 
2005   ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr);
2006   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2007   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2008   *newmat = M;
2009   PetscFunctionReturn(0);
2010 }
2011