xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3a40ed3dce77c081171d005ae1a6ff4bb9d13b6f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.216 1997/09/26 02:19:06 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   PetscFunctionBegin;
24   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
25   PLogObjectMemory(mat,aij->N*sizeof(int));
26   PetscMemzero(aij->colmap,aij->N*sizeof(int));
27   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
28   PetscFunctionReturn(0);
29 }
30 
31 extern int DisAssemble_MPIAIJ(Mat);
32 
33 #define CHUNKSIZE   15
34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
35 { \
36  \
37     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
38     rmax = aimax[row]; nrow = ailen[row];  \
39     col1 = col - shift; \
40      \
41     low = 0; high = nrow; \
42     while (high-low > 5) { \
43       t = (low+high)/2; \
44       if (rp[t] > col) high = t; \
45       else             low  = t; \
46     } \
47       for ( _i=0; _i<nrow; _i++ ) { \
48         if (rp[_i] > col1) break; \
49         if (rp[_i] == col1) { \
50           if (addv == ADD_VALUES) ap[_i] += value;   \
51           else                  ap[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
57       if (nrow >= rmax) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
60         Scalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
63  \
64         /* malloc new storage space */ \
65         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
66         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
67         new_j   = (int *) (new_a + new_nz); \
68         new_i   = new_j + new_nz; \
69  \
70         /* copy over old data into new slots */ \
71         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
72         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
73         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
74         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
75         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
76                                                            len*sizeof(int)); \
77         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
78         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
79                                                            len*sizeof(Scalar));  \
80         /* free up old matrix storage */ \
81  \
82         PetscFree(a->a);  \
83         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
84         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
85         a->singlemalloc = 1; \
86  \
87         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
88         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
89         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
90         a->maxnz += CHUNKSIZE; \
91         a->reallocs++; \
92       } \
93       N = nrow++ - 1; a->nz++; \
94       /* shift up all the later entries in this row */ \
95       for ( ii=N; ii>=_i; ii-- ) { \
96         rp[ii+1] = rp[ii]; \
97         ap[ii+1] = ap[ii]; \
98       } \
99       rp[_i] = col1;  \
100       ap[_i] = value;  \
101       a_noinsert: ; \
102       ailen[row] = nrow; \
103 }
104 
105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
106 { \
107  \
108     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
109     rmax = bimax[row]; nrow = bilen[row];  \
110     col1 = col - shift; \
111      \
112     low = 0; high = nrow; \
113     while (high-low > 5) { \
114       t = (low+high)/2; \
115       if (rp[t] > col) high = t; \
116       else             low  = t; \
117     } \
118        for ( _i=0; _i<nrow; _i++ ) { \
119         if (rp[_i] > col1) break; \
120         if (rp[_i] == col1) { \
121           if (addv == ADD_VALUES) ap[_i] += value;   \
122           else                  ap[_i] = value; \
123           goto b_noinsert; \
124         } \
125       }  \
126       if (nonew == 1) goto b_noinsert; \
127       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
128       if (nrow >= rmax) { \
129         /* there is no extra room in row, therefore enlarge */ \
130         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
131         Scalar *new_a; \
132  \
133         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
134  \
135         /* malloc new storage space */ \
136         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
137         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
138         new_j   = (int *) (new_a + new_nz); \
139         new_i   = new_j + new_nz; \
140  \
141         /* copy over old data into new slots */ \
142         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
143         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
144         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
145         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
146         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
147                                                            len*sizeof(int)); \
148         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
149         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
150                                                            len*sizeof(Scalar));  \
151         /* free up old matrix storage */ \
152  \
153         PetscFree(b->a);  \
154         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
155         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
156         b->singlemalloc = 1; \
157  \
158         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
159         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
160         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
161         b->maxnz += CHUNKSIZE; \
162         b->reallocs++; \
163       } \
164       N = nrow++ - 1; b->nz++; \
165       /* shift up all the later entries in this row */ \
166       for ( ii=N; ii>=_i; ii-- ) { \
167         rp[ii+1] = rp[ii]; \
168         ap[ii+1] = ap[ii]; \
169       } \
170       rp[_i] = col1;  \
171       ap[_i] = value;  \
172       b_noinsert: ; \
173       bilen[row] = nrow; \
174 }
175 
176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
177 #undef __FUNC__
178 #define __FUNC__ "MatSetValues_MPIAIJ"
179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
180 {
181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
182   Scalar     value;
183   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
184   int        cstart = aij->cstart, cend = aij->cend,row,col;
185   int        roworiented = aij->roworiented;
186 
187   /* Some Variables required in the macro */
188   Mat        A = aij->A;
189   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
190   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
191   Scalar     *aa = a->a;
192 
193   Mat        B = aij->B;
194   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
195   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
196   Scalar     *ba = b->a;
197 
198   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
199   int        nonew = a->nonew,shift = a->indexshift;
200   Scalar     *ap;
201 
202   PetscFunctionBegin;
203   for ( i=0; i<m; i++ ) {
204 #if defined(USE_PETSC_BOPT_g)
205     if (im[i] < 0) SETERRQ(1,0,"Negative row");
206     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
207 #endif
208     if (im[i] >= rstart && im[i] < rend) {
209       row = im[i] - rstart;
210       for ( j=0; j<n; j++ ) {
211         if (in[j] >= cstart && in[j] < cend){
212           col = in[j] - cstart;
213           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
214           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
215           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
216         }
217 #if defined(USE_PETSC_BOPT_g)
218         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
219         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
220 #endif
221         else {
222           if (mat->was_assembled) {
223             if (!aij->colmap) {
224               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
225             }
226             col = aij->colmap[in[j]] - 1;
227             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
228               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
229               col =  in[j];
230               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
231               B = aij->B;
232               b = (Mat_SeqAIJ *) B->data;
233               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
234               ba = b->a;
235             }
236           }
237           else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     }
294     else {
295       SETERRQ(1,0,"Only local values currently supported");
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   PetscFunctionBegin;
315   /* make sure all processors are either in INSERTMODE or ADDMODE */
316   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
317   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318     SETERRQ(1,0,"Some processors inserted others added");
319   }
320   mat->insertmode = addv; /* in case this processor had no cache */
321 
322   /*  first count number of contributors to each processor */
323   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
325   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
326   for ( i=0; i<aij->stash.n; i++ ) {
327     idx = aij->stash.idx[i];
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
331       }
332     }
333   }
334   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
335 
336   /* inform other processors of number of messages and max length*/
337   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
339   nreceives = work[rank];
340   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
341   nmax = work[rank];
342   PetscFree(work);
343 
344   /* post receives:
345        1) each message will consist of ordered pairs
346      (global index,value) we store the global index as a double
347      to simplify the message passing.
348        2) since we don't know how long each individual message is we
349      allocate the largest needed buffer for each receive. Potentially
350      this is a lot of wasted space.
351 
352 
353        This could be done better.
354   */
355   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
356   CHKPTRQ(rvalues);
357   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
358   CHKPTRQ(recv_waits);
359   for ( i=0; i<nreceives; i++ ) {
360     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
361               comm,recv_waits+i);
362   }
363 
364   /* do sends:
365       1) starts[i] gives the starting index in svalues for stuff going to
366          the ith processor
367   */
368   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
369   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
370   CHKPTRQ(send_waits);
371   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
372   starts[0] = 0;
373   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
374   for ( i=0; i<aij->stash.n; i++ ) {
375     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
376     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
377     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
378   }
379   PetscFree(owner);
380   starts[0] = 0;
381   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
382   count = 0;
383   for ( i=0; i<size; i++ ) {
384     if (procs[i]) {
385       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
386                 comm,send_waits+count++);
387     }
388   }
389   PetscFree(starts); PetscFree(nprocs);
390 
391   /* Free cache space */
392   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
393   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
394 
395   aij->svalues    = svalues;    aij->rvalues    = rvalues;
396   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
397   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
398   aij->rmax       = nmax;
399 
400   PetscFunctionReturn(0);
401 }
402 extern int MatSetUpMultiply_MPIAIJ(Mat);
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
406 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
407 {
408   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
409   MPI_Status  *send_status,recv_status;
410   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
411   int         row,col,other_disassembled;
412   Scalar      *values,val;
413   InsertMode  addv = mat->insertmode;
414 
415   PetscFunctionBegin;
416   /*  wait on receives */
417   while (count) {
418     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
419     /* unpack receives into our local space */
420     values = aij->rvalues + 3*imdex*aij->rmax;
421     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
422     n = n/3;
423     for ( i=0; i<n; i++ ) {
424       row = (int) PetscReal(values[3*i]) - aij->rstart;
425       col = (int) PetscReal(values[3*i+1]);
426       val = values[3*i+2];
427       if (col >= aij->cstart && col < aij->cend) {
428         col -= aij->cstart;
429         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
430       } else {
431         if (mat->was_assembled || mat->assembled) {
432           if (!aij->colmap) {
433             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
434           }
435           col = aij->colmap[col] - 1;
436           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
437             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
438             col = (int) PetscReal(values[3*i+1]);
439           }
440         }
441         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
442       }
443     }
444     count--;
445   }
446   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
447 
448   /* wait on sends */
449   if (aij->nsends) {
450     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
451     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
452     PetscFree(send_status);
453   }
454   PetscFree(aij->send_waits); PetscFree(aij->svalues);
455 
456   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
457   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
458 
459   /* determine if any processor has disassembled, if so we must
460      also disassemble ourselfs, in order that we may reassemble. */
461   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
462   if (mat->was_assembled && !other_disassembled) {
463     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465 
466   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
467     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
468   }
469   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
470   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
471 
472   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatZeroEntries_MPIAIJ"
478 int MatZeroEntries_MPIAIJ(Mat A)
479 {
480   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
481   int        ierr;
482 
483   PetscFunctionBegin;
484   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
485   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 /* the code does not do the diagonal entries correctly unless the
490    matrix is square and the column and row owerships are identical.
491    This is a BUG. The only way to fix it seems to be to access
492    aij->A and aij->B directly and not through the MatZeroRows()
493    routine.
494 */
495 #undef __FUNC__
496 #define __FUNC__ "MatZeroRows_MPIAIJ"
497 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
498 {
499   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
500   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
501   int            *procs,*nprocs,j,found,idx,nsends,*work;
502   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
503   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
504   int            *lens,imdex,*lrows,*values;
505   MPI_Comm       comm = A->comm;
506   MPI_Request    *send_waits,*recv_waits;
507   MPI_Status     recv_status,*send_status;
508   IS             istmp;
509 
510   PetscFunctionBegin;
511   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
512   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
513 
514   /*  first count number of contributors to each processor */
515   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
516   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
517   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
518   for ( i=0; i<N; i++ ) {
519     idx = rows[i];
520     found = 0;
521     for ( j=0; j<size; j++ ) {
522       if (idx >= owners[j] && idx < owners[j+1]) {
523         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
524       }
525     }
526     if (!found) SETERRQ(1,0,"Index out of range");
527   }
528   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
529 
530   /* inform other processors of number of messages and max length*/
531   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
532   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
533   nrecvs = work[rank];
534   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
535   nmax = work[rank];
536   PetscFree(work);
537 
538   /* post receives:   */
539   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
540   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
541   CHKPTRQ(recv_waits);
542   for ( i=0; i<nrecvs; i++ ) {
543     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
544   }
545 
546   /* do sends:
547       1) starts[i] gives the starting index in svalues for stuff going to
548          the ith processor
549   */
550   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
551   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
552   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
553   starts[0] = 0;
554   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
555   for ( i=0; i<N; i++ ) {
556     svalues[starts[owner[i]]++] = rows[i];
557   }
558   ISRestoreIndices(is,&rows);
559 
560   starts[0] = 0;
561   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
562   count = 0;
563   for ( i=0; i<size; i++ ) {
564     if (procs[i]) {
565       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
566     }
567   }
568   PetscFree(starts);
569 
570   base = owners[rank];
571 
572   /*  wait on receives */
573   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
574   source = lens + nrecvs;
575   count  = nrecvs; slen = 0;
576   while (count) {
577     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
578     /* unpack receives into our local space */
579     MPI_Get_count(&recv_status,MPI_INT,&n);
580     source[imdex]  = recv_status.MPI_SOURCE;
581     lens[imdex]  = n;
582     slen += n;
583     count--;
584   }
585   PetscFree(recv_waits);
586 
587   /* move the data into the send scatter */
588   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
589   count = 0;
590   for ( i=0; i<nrecvs; i++ ) {
591     values = rvalues + i*nmax;
592     for ( j=0; j<lens[i]; j++ ) {
593       lrows[count++] = values[j] - base;
594     }
595   }
596   PetscFree(rvalues); PetscFree(lens);
597   PetscFree(owner); PetscFree(nprocs);
598 
599   /* actually zap the local rows */
600   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
601   PLogObjectParent(A,istmp);
602   PetscFree(lrows);
603   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
604   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
605   ierr = ISDestroy(istmp); CHKERRQ(ierr);
606 
607   /* wait on sends */
608   if (nsends) {
609     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
610     CHKPTRQ(send_status);
611     MPI_Waitall(nsends,send_waits,send_status);
612     PetscFree(send_status);
613   }
614   PetscFree(send_waits); PetscFree(svalues);
615 
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNC__
620 #define __FUNC__ "MatMult_MPIAIJ"
621 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
622 {
623   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
624   int        ierr,nt;
625 
626   PetscFunctionBegin;
627   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
628   if (nt != a->n) {
629     SETERRQ(1,0,"Incompatible partition of A and xx");
630   }
631   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
632   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
633   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
634   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
635   PetscFunctionReturn(0);
636 }
637 
638 #undef __FUNC__
639 #define __FUNC__ "MatMultAdd_MPIAIJ"
640 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
641 {
642   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
643   int        ierr;
644 
645   PetscFunctionBegin;
646   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
647   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
648   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
649   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNC__
654 #define __FUNC__ "MatMultTrans_MPIAIJ"
655 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
656 {
657   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
658   int        ierr;
659 
660   PetscFunctionBegin;
661   /* do nondiagonal part */
662   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
663   /* send it on its way */
664   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
665   /* do local part */
666   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
667   /* receive remote parts: note this assumes the values are not actually */
668   /* inserted in yy until the next line, which is true for my implementation*/
669   /* but is not perhaps always true. */
670   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
676 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNC__
700 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
701 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   int        ierr;
704   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
705 
706   PetscFunctionBegin;
707   if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(1,0,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNC__
716 #define __FUNC__ "MatScale_MPIAIJ"
717 int MatScale_MPIAIJ(Scalar *aa,Mat A)
718 {
719   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
720   int        ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNC__
729 #define __FUNC__ "MatDestroy_MPIAIJ"
730 int MatDestroy_MPIAIJ(PetscObject obj)
731 {
732   Mat        mat = (Mat) obj;
733   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
734   int        ierr;
735 
736   PetscFunctionBegin;
737 #if defined(USE_PETSC_LOG)
738   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
739 #endif
740   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
741   PetscFree(aij->rowners);
742   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
743   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
744   if (aij->colmap) PetscFree(aij->colmap);
745   if (aij->garray) PetscFree(aij->garray);
746   if (aij->lvec)   VecDestroy(aij->lvec);
747   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
748   if (aij->rowvalues) PetscFree(aij->rowvalues);
749   PetscFree(aij);
750   PLogObjectDestroy(mat);
751   PetscHeaderDestroy(mat);
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNC__
756 #define __FUNC__ "MatView_MPIAIJ_Binary"
757 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
758 {
759   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
760   int         ierr;
761 
762   PetscFunctionBegin;
763   if (aij->size == 1) {
764     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
765   }
766   else SETERRQ(1,0,"Only uniprocessor output supported");
767   PetscFunctionReturn(0);
768 }
769 
770 #undef __FUNC__
771 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
772 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
773 {
774   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
775   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
776   int         ierr, format,shift = C->indexshift,rank;
777   FILE        *fd;
778   ViewerType  vtype;
779 
780   PetscFunctionBegin;
781   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
782   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
783     ierr = ViewerGetFormat(viewer,&format);
784     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
785       MatInfo info;
786       int     flg;
787       MPI_Comm_rank(mat->comm,&rank);
788       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
789       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
790       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
791       PetscSequentialPhaseBegin(mat->comm,1);
792       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
793          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
794       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
795          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
796       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
797       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
798       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
799       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
800       fflush(fd);
801       PetscSequentialPhaseEnd(mat->comm,1);
802       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
803       PetscFunctionReturn(0);
804     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (vtype == DRAW_VIEWER) {
810     Draw       draw;
811     PetscTruth isnull;
812     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
813     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (vtype == ASCII_FILE_VIEWER) {
817     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
818     PetscSequentialPhaseBegin(mat->comm,1);
819     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
820            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
821            aij->cend);
822     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
823     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
824     fflush(fd);
825     PetscSequentialPhaseEnd(mat->comm,1);
826   } else {
827     int size = aij->size;
828     rank = aij->rank;
829     if (size == 1) {
830       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
831     } else {
832       /* assemble the entire matrix onto first processor. */
833       Mat         A;
834       Mat_SeqAIJ *Aloc;
835       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
836       Scalar      *a;
837 
838       if (!rank) {
839         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
840                CHKERRQ(ierr);
841       } else {
842         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
843                CHKERRQ(ierr);
844       }
845       PLogObjectParent(mat,A);
846 
847       /* copy over the A part */
848       Aloc = (Mat_SeqAIJ*) aij->A->data;
849       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
850       row = aij->rstart;
851       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
852       for ( i=0; i<m; i++ ) {
853         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
854         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
855       }
856       aj = Aloc->j;
857       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
858 
859       /* copy over the B part */
860       Aloc = (Mat_SeqAIJ*) aij->B->data;
861       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
862       row = aij->rstart;
863       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
864       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
865       for ( i=0; i<m; i++ ) {
866         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
867         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
868       }
869       PetscFree(ct);
870       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
871       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
872       if (!rank) {
873         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
874       }
875       ierr = MatDestroy(A); CHKERRQ(ierr);
876     }
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNC__
882 #define __FUNC__ "MatView_MPIAIJ"
883 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
884 {
885   Mat         mat = (Mat) obj;
886   int         ierr;
887   ViewerType  vtype;
888 
889   PetscFunctionBegin;
890   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
891   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
892       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
893     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
894   }
895   else if (vtype == BINARY_FILE_VIEWER) {
896     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 /*
902     This has to provide several versions.
903 
904      2) a) use only local smoothing updating outer values only once.
905         b) local smoothing updating outer values each inner iteration
906      3) color updating out values betwen colors.
907 */
908 #undef __FUNC__
909 #define __FUNC__ "MatRelax_MPIAIJ"
910 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
911                            double fshift,int its,Vec xx)
912 {
913   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
914   Mat        AA = mat->A, BB = mat->B;
915   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
916   Scalar     *b,*x,*xs,*ls,d,*v,sum;
917   int        ierr,*idx, *diag;
918   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
919 
920   PetscFunctionBegin;
921   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
922   xs = x + shift; /* shift by one for index start of 1 */
923   ls = ls + shift;
924   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
925   diag = A->diag;
926   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
927     if (flag & SOR_ZERO_INITIAL_GUESS) {
928       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
929       PetscFunctionReturn(0);
930     }
931     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
932     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
933     while (its--) {
934       /* go down through the rows */
935       for ( i=0; i<m; i++ ) {
936         n    = A->i[i+1] - A->i[i];
937         idx  = A->j + A->i[i] + shift;
938         v    = A->a + A->i[i] + shift;
939         sum  = b[i];
940         SPARSEDENSEMDOT(sum,xs,v,idx,n);
941         d    = fshift + A->a[diag[i]+shift];
942         n    = B->i[i+1] - B->i[i];
943         idx  = B->j + B->i[i] + shift;
944         v    = B->a + B->i[i] + shift;
945         SPARSEDENSEMDOT(sum,ls,v,idx,n);
946         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
947       }
948       /* come up through the rows */
949       for ( i=m-1; i>-1; i-- ) {
950         n    = A->i[i+1] - A->i[i];
951         idx  = A->j + A->i[i] + shift;
952         v    = A->a + A->i[i] + shift;
953         sum  = b[i];
954         SPARSEDENSEMDOT(sum,xs,v,idx,n);
955         d    = fshift + A->a[diag[i]+shift];
956         n    = B->i[i+1] - B->i[i];
957         idx  = B->j + B->i[i] + shift;
958         v    = B->a + B->i[i] + shift;
959         SPARSEDENSEMDOT(sum,ls,v,idx,n);
960         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
961       }
962     }
963   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
964     if (flag & SOR_ZERO_INITIAL_GUESS) {
965       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     }
968     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
969     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
970     while (its--) {
971       for ( i=0; i<m; i++ ) {
972         n    = A->i[i+1] - A->i[i];
973         idx  = A->j + A->i[i] + shift;
974         v    = A->a + A->i[i] + shift;
975         sum  = b[i];
976         SPARSEDENSEMDOT(sum,xs,v,idx,n);
977         d    = fshift + A->a[diag[i]+shift];
978         n    = B->i[i+1] - B->i[i];
979         idx  = B->j + B->i[i] + shift;
980         v    = B->a + B->i[i] + shift;
981         SPARSEDENSEMDOT(sum,ls,v,idx,n);
982         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
983       }
984     }
985   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
986     if (flag & SOR_ZERO_INITIAL_GUESS) {
987       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
988       PetscFunctionReturn(0);
989     }
990     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
991                             mat->Mvctx); CHKERRQ(ierr);
992     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
993                             mat->Mvctx); CHKERRQ(ierr);
994     while (its--) {
995       for ( i=m-1; i>-1; i-- ) {
996         n    = A->i[i+1] - A->i[i];
997         idx  = A->j + A->i[i] + shift;
998         v    = A->a + A->i[i] + shift;
999         sum  = b[i];
1000         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1001         d    = fshift + A->a[diag[i]+shift];
1002         n    = B->i[i+1] - B->i[i];
1003         idx  = B->j + B->i[i] + shift;
1004         v    = B->a + B->i[i] + shift;
1005         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1006         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1007       }
1008     }
1009   } else {
1010     SETERRQ(1,0,"Option not supported");
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNC__
1016 #define __FUNC__ "MatGetInfo_MPIAIJ"
1017 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1018 {
1019   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1020   Mat        A = mat->A, B = mat->B;
1021   int        ierr;
1022   double     isend[5], irecv[5];
1023 
1024   PetscFunctionBegin;
1025   info->rows_global    = (double)mat->M;
1026   info->columns_global = (double)mat->N;
1027   info->rows_local     = (double)mat->m;
1028   info->columns_local  = (double)mat->N;
1029   info->block_size     = 1.0;
1030   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1031   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1032   isend[3] = info->memory;  isend[4] = info->mallocs;
1033   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1034   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1035   isend[3] += info->memory;  isend[4] += info->mallocs;
1036   if (flag == MAT_LOCAL) {
1037     info->nz_used      = isend[0];
1038     info->nz_allocated = isend[1];
1039     info->nz_unneeded  = isend[2];
1040     info->memory       = isend[3];
1041     info->mallocs      = isend[4];
1042   } else if (flag == MAT_GLOBAL_MAX) {
1043     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1044     info->nz_used      = irecv[0];
1045     info->nz_allocated = irecv[1];
1046     info->nz_unneeded  = irecv[2];
1047     info->memory       = irecv[3];
1048     info->mallocs      = irecv[4];
1049   } else if (flag == MAT_GLOBAL_SUM) {
1050     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1051     info->nz_used      = irecv[0];
1052     info->nz_allocated = irecv[1];
1053     info->nz_unneeded  = irecv[2];
1054     info->memory       = irecv[3];
1055     info->mallocs      = irecv[4];
1056   }
1057   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1058   info->fill_ratio_needed = 0;
1059   info->factor_mallocs    = 0;
1060 
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNC__
1065 #define __FUNC__ "MatSetOption_MPIAIJ"
1066 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1067 {
1068   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1069 
1070   PetscFunctionBegin;
1071   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1072       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1073       op == MAT_COLUMNS_UNSORTED ||
1074       op == MAT_COLUMNS_SORTED ||
1075       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1076       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1077         MatSetOption(a->A,op);
1078         MatSetOption(a->B,op);
1079   } else if (op == MAT_ROW_ORIENTED) {
1080     a->roworiented = 1;
1081     MatSetOption(a->A,op);
1082     MatSetOption(a->B,op);
1083   } else if (op == MAT_ROWS_SORTED ||
1084              op == MAT_ROWS_UNSORTED ||
1085              op == MAT_SYMMETRIC ||
1086              op == MAT_STRUCTURALLY_SYMMETRIC ||
1087              op == MAT_YES_NEW_DIAGONALS)
1088     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1089   else if (op == MAT_COLUMN_ORIENTED) {
1090     a->roworiented = 0;
1091     MatSetOption(a->A,op);
1092     MatSetOption(a->B,op);
1093   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1094     a->donotstash = 1;
1095   } else if (op == MAT_NO_NEW_DIAGONALS){
1096     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1097   } else {
1098     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNC__
1104 #define __FUNC__ "MatGetSize_MPIAIJ"
1105 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1106 {
1107   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1108 
1109   PetscFunctionBegin;
1110   if (m) *m = mat->M;
1111   if (n) *n = mat->N;
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNC__
1116 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1117 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1118 {
1119   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1120 
1121   PetscFunctionBegin;
1122   if (m) *m = mat->m;
1123   if (n) *n = mat->N;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNC__
1128 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1129 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1130 {
1131   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1132 
1133   PetscFunctionBegin;
1134   *m = mat->rstart; *n = mat->rend;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1139 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatGetRow_MPIAIJ"
1143 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1144 {
1145   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1146   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1147   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1148   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1149   int        *cmap, *idx_p;
1150 
1151   PetscFunctionBegin;
1152   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1153   mat->getrowactive = PETSC_TRUE;
1154 
1155   if (!mat->rowvalues && (idx || v)) {
1156     /*
1157         allocate enough space to hold information from the longest row.
1158     */
1159     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1160     int     max = 1,m = mat->m,tmp;
1161     for ( i=0; i<m; i++ ) {
1162       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1163       if (max < tmp) { max = tmp; }
1164     }
1165     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1166     CHKPTRQ(mat->rowvalues);
1167     mat->rowindices = (int *) (mat->rowvalues + max);
1168   }
1169 
1170   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1171   lrow = row - rstart;
1172 
1173   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1174   if (!v)   {pvA = 0; pvB = 0;}
1175   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1176   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1177   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1178   nztot = nzA + nzB;
1179 
1180   cmap  = mat->garray;
1181   if (v  || idx) {
1182     if (nztot) {
1183       /* Sort by increasing column numbers, assuming A and B already sorted */
1184       int imark = -1;
1185       if (v) {
1186         *v = v_p = mat->rowvalues;
1187         for ( i=0; i<nzB; i++ ) {
1188           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1189           else break;
1190         }
1191         imark = i;
1192         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1193         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1194       }
1195       if (idx) {
1196         *idx = idx_p = mat->rowindices;
1197         if (imark > -1) {
1198           for ( i=0; i<imark; i++ ) {
1199             idx_p[i] = cmap[cworkB[i]];
1200           }
1201         } else {
1202           for ( i=0; i<nzB; i++ ) {
1203             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1204             else break;
1205           }
1206           imark = i;
1207         }
1208         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1209         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1210       }
1211     }
1212     else {
1213       if (idx) *idx = 0;
1214       if (v)   *v   = 0;
1215     }
1216   }
1217   *nz = nztot;
1218   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1219   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNC__
1224 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1225 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1226 {
1227   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1228 
1229   PetscFunctionBegin;
1230   if (aij->getrowactive == PETSC_FALSE) {
1231     SETERRQ(1,0,"MatGetRow not called");
1232   }
1233   aij->getrowactive = PETSC_FALSE;
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNC__
1238 #define __FUNC__ "MatNorm_MPIAIJ"
1239 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1240 {
1241   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1242   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1243   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1244   double     sum = 0.0;
1245   Scalar     *v;
1246 
1247   PetscFunctionBegin;
1248   if (aij->size == 1) {
1249     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1250   } else {
1251     if (type == NORM_FROBENIUS) {
1252       v = amat->a;
1253       for (i=0; i<amat->nz; i++ ) {
1254 #if defined(USE_PETSC_COMPLEX)
1255         sum += real(conj(*v)*(*v)); v++;
1256 #else
1257         sum += (*v)*(*v); v++;
1258 #endif
1259       }
1260       v = bmat->a;
1261       for (i=0; i<bmat->nz; i++ ) {
1262 #if defined(USE_PETSC_COMPLEX)
1263         sum += real(conj(*v)*(*v)); v++;
1264 #else
1265         sum += (*v)*(*v); v++;
1266 #endif
1267       }
1268       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1269       *norm = sqrt(*norm);
1270     } else if (type == NORM_1) { /* max column norm */
1271       double *tmp, *tmp2;
1272       int    *jj, *garray = aij->garray;
1273       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1274       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1275       PetscMemzero(tmp,aij->N*sizeof(double));
1276       *norm = 0.0;
1277       v = amat->a; jj = amat->j;
1278       for ( j=0; j<amat->nz; j++ ) {
1279         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1280       }
1281       v = bmat->a; jj = bmat->j;
1282       for ( j=0; j<bmat->nz; j++ ) {
1283         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1284       }
1285       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1286       for ( j=0; j<aij->N; j++ ) {
1287         if (tmp2[j] > *norm) *norm = tmp2[j];
1288       }
1289       PetscFree(tmp); PetscFree(tmp2);
1290     } else if (type == NORM_INFINITY) { /* max row norm */
1291       double ntemp = 0.0;
1292       for ( j=0; j<amat->m; j++ ) {
1293         v = amat->a + amat->i[j] + shift;
1294         sum = 0.0;
1295         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1296           sum += PetscAbsScalar(*v); v++;
1297         }
1298         v = bmat->a + bmat->i[j] + shift;
1299         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1300           sum += PetscAbsScalar(*v); v++;
1301         }
1302         if (sum > ntemp) ntemp = sum;
1303       }
1304       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1305     }
1306     else {
1307       SETERRQ(1,0,"No support for two norm");
1308     }
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNC__
1314 #define __FUNC__ "MatTranspose_MPIAIJ"
1315 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1316 {
1317   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1318   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1319   int        ierr,shift = Aloc->indexshift;
1320   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1321   Mat        B;
1322   Scalar     *array;
1323 
1324   PetscFunctionBegin;
1325   if (matout == PETSC_NULL && M != N) {
1326     SETERRQ(1,0,"Square matrix only for in-place");
1327   }
1328 
1329   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1330 
1331   /* copy over the A part */
1332   Aloc = (Mat_SeqAIJ*) a->A->data;
1333   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1334   row = a->rstart;
1335   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1336   for ( i=0; i<m; i++ ) {
1337     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1338     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1339   }
1340   aj = Aloc->j;
1341   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1342 
1343   /* copy over the B part */
1344   Aloc = (Mat_SeqAIJ*) a->B->data;
1345   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346   row = a->rstart;
1347   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1348   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1349   for ( i=0; i<m; i++ ) {
1350     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1351     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1352   }
1353   PetscFree(ct);
1354   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1355   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1356   if (matout != PETSC_NULL) {
1357     *matout = B;
1358   } else {
1359     /* This isn't really an in-place transpose .... but free data structures from a */
1360     PetscFree(a->rowners);
1361     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1362     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1363     if (a->colmap) PetscFree(a->colmap);
1364     if (a->garray) PetscFree(a->garray);
1365     if (a->lvec) VecDestroy(a->lvec);
1366     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1367     PetscFree(a);
1368     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1369     PetscHeaderDestroy(B);
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNC__
1375 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1376 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1377 {
1378   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1379   Mat a = aij->A, b = aij->B;
1380   int ierr,s1,s2,s3;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1384   if (rr) {
1385     s3 = aij->n;
1386     VecGetLocalSize_Fast(rr,s1);
1387     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1388     /* Overlap communication with computation. */
1389     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1390   }
1391   if (ll) {
1392     VecGetLocalSize_Fast(ll,s1);
1393     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1394     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1395   }
1396   /* scale  the diagonal block */
1397   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1398 
1399   if (rr) {
1400     /* Do a scatter end and then right scale the off-diagonal block */
1401     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1402     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1403   }
1404 
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 
1409 extern int MatPrintHelp_SeqAIJ(Mat);
1410 #undef __FUNC__
1411 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1412 int MatPrintHelp_MPIAIJ(Mat A)
1413 {
1414   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1415   int        ierr;
1416 
1417   PetscFunctionBegin;
1418   if (!a->rank) {
1419     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNC__
1425 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1426 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1427 {
1428   PetscFunctionBegin;
1429   *bs = 1;
1430   PetscFunctionReturn(0);
1431 }
1432 #undef __FUNC__
1433 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1434 int MatSetUnfactored_MPIAIJ(Mat A)
1435 {
1436   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1437   int        ierr;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNC__
1445 #define __FUNC__ "MatEqual_MPIAIJ"
1446 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1447 {
1448   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1449   Mat        a, b, c, d;
1450   PetscTruth flg;
1451   int        ierr;
1452 
1453   PetscFunctionBegin;
1454   if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type");
1455   a = matA->A; b = matA->B;
1456   c = matB->A; d = matB->B;
1457 
1458   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1459   if (flg == PETSC_TRUE) {
1460     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1461   }
1462   MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1467 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1468 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1469 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1470 /* -------------------------------------------------------------------*/
1471 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1472        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1473        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1474        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1475        0,0,
1476        0,0,
1477        0,0,
1478        MatRelax_MPIAIJ,
1479        MatTranspose_MPIAIJ,
1480        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1481        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1482        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1483        0,
1484        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1485        0,0,0,0,
1486        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1487        0,0,
1488        0,0,MatConvertSameType_MPIAIJ,0,0,
1489        0,0,0,
1490        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1491        MatPrintHelp_MPIAIJ,
1492        MatScale_MPIAIJ,0,0,0,
1493        MatGetBlockSize_MPIAIJ,0,0,0,0,
1494        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1495 
1496 
1497 #undef __FUNC__
1498 #define __FUNC__ "MatCreateMPIAIJ"
1499 /*@C
1500    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1501    (the default parallel PETSc format).  For good matrix assembly performance
1502    the user should preallocate the matrix storage by setting the parameters
1503    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1504    performance can be increased by more than a factor of 50.
1505 
1506    Input Parameters:
1507 .  comm - MPI communicator
1508 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1509            This value should be the same as the local size used in creating the
1510            y vector for the matrix-vector product y = Ax.
1511 .  n - This value should be the same as the local size used in creating the
1512        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1513        calculated if N is given) For square matrices n is almost always m.
1514 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1515 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1516 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1517            (same for all local rows)
1518 .  d_nzz - array containing the number of nonzeros in the various rows of the
1519            diagonal portion of the local submatrix (possibly different for each row)
1520            or PETSC_NULL. You must leave room for the diagonal entry even if
1521            it is zero.
1522 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1523            submatrix (same for all local rows).
1524 .  o_nzz - array containing the number of nonzeros in the various rows of the
1525            off-diagonal portion of the local submatrix (possibly different for
1526            each row) or PETSC_NULL.
1527 
1528    Output Parameter:
1529 .  A - the matrix
1530 
1531    Notes:
1532    The AIJ format (also called the Yale sparse matrix format or
1533    compressed row storage), is fully compatible with standard Fortran 77
1534    storage.  That is, the stored row and column indices can begin at
1535    either one (as in Fortran) or zero.  See the users manual for details.
1536 
1537    The user MUST specify either the local or global matrix dimensions
1538    (possibly both).
1539 
1540    By default, this format uses inodes (identical nodes) when possible.
1541    We search for consecutive rows with the same nonzero structure, thereby
1542    reusing matrix information to achieve increased efficiency.
1543 
1544    Options Database Keys:
1545 $    -mat_aij_no_inode  - Do not use inodes
1546 $    -mat_aij_inode_limit <limit> - Set inode limit.
1547 $        (max limit=5)
1548 $    -mat_aij_oneindex - Internally use indexing starting at 1
1549 $        rather than 0.  Note: When calling MatSetValues(),
1550 $        the user still MUST index entries starting at 0!
1551 
1552    Storage Information:
1553    For a square global matrix we define each processor's diagonal portion
1554    to be its local rows and the corresponding columns (a square submatrix);
1555    each processor's off-diagonal portion encompasses the remainder of the
1556    local matrix (a rectangular submatrix).
1557 
1558    The user can specify preallocated storage for the diagonal part of
1559    the local submatrix with either d_nz or d_nnz (not both).  Set
1560    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1561    memory allocation.  Likewise, specify preallocated storage for the
1562    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1563 
1564    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1565    the figure below we depict these three local rows and all columns (0-11).
1566 
1567 $          0 1 2 3 4 5 6 7 8 9 10 11
1568 $         -------------------
1569 $  row 3  |  o o o d d d o o o o o o
1570 $  row 4  |  o o o d d d o o o o o o
1571 $  row 5  |  o o o d d d o o o o o o
1572 $         -------------------
1573 $
1574 
1575    Thus, any entries in the d locations are stored in the d (diagonal)
1576    submatrix, and any entries in the o locations are stored in the
1577    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1578    stored simply in the MATSEQAIJ format for compressed row storage.
1579 
1580    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1581    and o_nz should indicate the number of nonzeros per row in the o matrix.
1582    In general, for PDE problems in which most nonzeros are near the diagonal,
1583    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1584    or you will get TERRIBLE performance; see the users' manual chapter on
1585    matrices.
1586 
1587 .keywords: matrix, aij, compressed row, sparse, parallel
1588 
1589 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1590 @*/
1591 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1592                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1593 {
1594   Mat          B;
1595   Mat_MPIAIJ   *b;
1596   int          ierr, i,sum[2],work[2],size;
1597 
1598   PetscFunctionBegin;
1599   MPI_Comm_size(comm,&size);
1600   if (size == 1) {
1601     if (M == PETSC_DECIDE) M = m;
1602     if (N == PETSC_DECIDE) N = n;
1603     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1604     PetscFunctionReturn(0);
1605   }
1606 
1607   *A = 0;
1608   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1609   PLogObjectCreate(B);
1610   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1611   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1612   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1613   B->destroy    = MatDestroy_MPIAIJ;
1614   B->view       = MatView_MPIAIJ;
1615   B->factor     = 0;
1616   B->assembled  = PETSC_FALSE;
1617   B->mapping    = 0;
1618 
1619   B->insertmode = NOT_SET_VALUES;
1620   b->size       = size;
1621   MPI_Comm_rank(comm,&b->rank);
1622 
1623   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1624     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1625   }
1626 
1627   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1628     work[0] = m; work[1] = n;
1629     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1630     if (M == PETSC_DECIDE) M = sum[0];
1631     if (N == PETSC_DECIDE) N = sum[1];
1632   }
1633   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1634   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1635   b->m = m; B->m = m;
1636   b->n = n; B->n = n;
1637   b->N = N; B->N = N;
1638   b->M = M; B->M = M;
1639 
1640   /* build local table of row and column ownerships */
1641   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1642   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1643   b->cowners = b->rowners + b->size + 2;
1644   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1645   b->rowners[0] = 0;
1646   for ( i=2; i<=b->size; i++ ) {
1647     b->rowners[i] += b->rowners[i-1];
1648   }
1649   b->rstart = b->rowners[b->rank];
1650   b->rend   = b->rowners[b->rank+1];
1651   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1652   b->cowners[0] = 0;
1653   for ( i=2; i<=b->size; i++ ) {
1654     b->cowners[i] += b->cowners[i-1];
1655   }
1656   b->cstart = b->cowners[b->rank];
1657   b->cend   = b->cowners[b->rank+1];
1658 
1659   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1660   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1661   PLogObjectParent(B,b->A);
1662   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1663   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1664   PLogObjectParent(B,b->B);
1665 
1666   /* build cache for off array entries formed */
1667   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1668   b->donotstash  = 0;
1669   b->colmap      = 0;
1670   b->garray      = 0;
1671   b->roworiented = 1;
1672 
1673   /* stuff used for matrix vector multiply */
1674   b->lvec      = 0;
1675   b->Mvctx     = 0;
1676 
1677   /* stuff for MatGetRow() */
1678   b->rowindices   = 0;
1679   b->rowvalues    = 0;
1680   b->getrowactive = PETSC_FALSE;
1681 
1682   *A = B;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNC__
1687 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1688 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1689 {
1690   Mat        mat;
1691   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1692   int        ierr, len=0, flg;
1693 
1694   PetscFunctionBegin;
1695   *newmat       = 0;
1696   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1697   PLogObjectCreate(mat);
1698   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1699   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1700   mat->destroy    = MatDestroy_MPIAIJ;
1701   mat->view       = MatView_MPIAIJ;
1702   mat->factor     = matin->factor;
1703   mat->assembled  = PETSC_TRUE;
1704 
1705   a->m = mat->m   = oldmat->m;
1706   a->n = mat->n   = oldmat->n;
1707   a->M = mat->M   = oldmat->M;
1708   a->N = mat->N   = oldmat->N;
1709 
1710   a->rstart       = oldmat->rstart;
1711   a->rend         = oldmat->rend;
1712   a->cstart       = oldmat->cstart;
1713   a->cend         = oldmat->cend;
1714   a->size         = oldmat->size;
1715   a->rank         = oldmat->rank;
1716   mat->insertmode = NOT_SET_VALUES;
1717   a->rowvalues    = 0;
1718   a->getrowactive = PETSC_FALSE;
1719 
1720   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1721   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1722   a->cowners = a->rowners + a->size + 2;
1723   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1724   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1725   if (oldmat->colmap) {
1726     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1727     PLogObjectMemory(mat,(a->N)*sizeof(int));
1728     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1729   } else a->colmap = 0;
1730   if (oldmat->garray) {
1731     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1732     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1733     PLogObjectMemory(mat,len*sizeof(int));
1734     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1735   } else a->garray = 0;
1736 
1737   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1738   PLogObjectParent(mat,a->lvec);
1739   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1740   PLogObjectParent(mat,a->Mvctx);
1741   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1742   PLogObjectParent(mat,a->A);
1743   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1744   PLogObjectParent(mat,a->B);
1745   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1746   if (flg) {
1747     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1748   }
1749   *newmat = mat;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #include "sys.h"
1754 
1755 #undef __FUNC__
1756 #define __FUNC__ "MatLoad_MPIAIJ"
1757 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1758 {
1759   Mat          A;
1760   Scalar       *vals,*svals;
1761   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1762   MPI_Status   status;
1763   int          i, nz, ierr, j,rstart, rend, fd;
1764   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1765   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1766   int          tag = ((PetscObject)viewer)->tag;
1767 
1768   PetscFunctionBegin;
1769   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1770   if (!rank) {
1771     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1772     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1773     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1774   }
1775 
1776   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1777   M = header[1]; N = header[2];
1778   /* determine ownership of all rows */
1779   m = M/size + ((M % size) > rank);
1780   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1781   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1782   rowners[0] = 0;
1783   for ( i=2; i<=size; i++ ) {
1784     rowners[i] += rowners[i-1];
1785   }
1786   rstart = rowners[rank];
1787   rend   = rowners[rank+1];
1788 
1789   /* distribute row lengths to all processors */
1790   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1791   offlens = ourlens + (rend-rstart);
1792   if (!rank) {
1793     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1794     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1795     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1796     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1797     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1798     PetscFree(sndcounts);
1799   } else {
1800     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1801   }
1802 
1803   if (!rank) {
1804     /* calculate the number of nonzeros on each processor */
1805     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1806     PetscMemzero(procsnz,size*sizeof(int));
1807     for ( i=0; i<size; i++ ) {
1808       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1809         procsnz[i] += rowlengths[j];
1810       }
1811     }
1812     PetscFree(rowlengths);
1813 
1814     /* determine max buffer needed and allocate it */
1815     maxnz = 0;
1816     for ( i=0; i<size; i++ ) {
1817       maxnz = PetscMax(maxnz,procsnz[i]);
1818     }
1819     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1820 
1821     /* read in my part of the matrix column indices  */
1822     nz = procsnz[0];
1823     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1824     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1825 
1826     /* read in every one elses and ship off */
1827     for ( i=1; i<size; i++ ) {
1828       nz = procsnz[i];
1829       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1830       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1831     }
1832     PetscFree(cols);
1833   } else {
1834     /* determine buffer space needed for message */
1835     nz = 0;
1836     for ( i=0; i<m; i++ ) {
1837       nz += ourlens[i];
1838     }
1839     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1840 
1841     /* receive message of column indices*/
1842     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1843     MPI_Get_count(&status,MPI_INT,&maxnz);
1844     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1845   }
1846 
1847   /* loop over local rows, determining number of off diagonal entries */
1848   PetscMemzero(offlens,m*sizeof(int));
1849   jj = 0;
1850   for ( i=0; i<m; i++ ) {
1851     for ( j=0; j<ourlens[i]; j++ ) {
1852       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1853       jj++;
1854     }
1855   }
1856 
1857   /* create our matrix */
1858   for ( i=0; i<m; i++ ) {
1859     ourlens[i] -= offlens[i];
1860   }
1861   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1862   A = *newmat;
1863   MatSetOption(A,MAT_COLUMNS_SORTED);
1864   for ( i=0; i<m; i++ ) {
1865     ourlens[i] += offlens[i];
1866   }
1867 
1868   if (!rank) {
1869     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1870 
1871     /* read in my part of the matrix numerical values  */
1872     nz = procsnz[0];
1873     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1874 
1875     /* insert into matrix */
1876     jj      = rstart;
1877     smycols = mycols;
1878     svals   = vals;
1879     for ( i=0; i<m; i++ ) {
1880       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1881       smycols += ourlens[i];
1882       svals   += ourlens[i];
1883       jj++;
1884     }
1885 
1886     /* read in other processors and ship out */
1887     for ( i=1; i<size; i++ ) {
1888       nz = procsnz[i];
1889       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1890       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1891     }
1892     PetscFree(procsnz);
1893   } else {
1894     /* receive numeric values */
1895     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1896 
1897     /* receive message of values*/
1898     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1899     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1900     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1901 
1902     /* insert into matrix */
1903     jj      = rstart;
1904     smycols = mycols;
1905     svals   = vals;
1906     for ( i=0; i<m; i++ ) {
1907       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1908       smycols += ourlens[i];
1909       svals   += ourlens[i];
1910       jj++;
1911     }
1912   }
1913   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1914 
1915   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1916   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919