xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 95b207ea01a3fbf8432ea0e0186aba3bf9cd0565)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.209 1997/07/09 20:54:04 balay Exp balay $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = aimax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66     low = 0; high = nrow; \
67     while (high-low > 5) { \
68       t = (low+high)/2; \
69       if (rp[t] > col) high = t; \
70       else             low  = t; \
71     } \
72       for ( _i=0; _i<nrow; _i++ ) { \
73         if (rp[_i] > col1) break; \
74         if (rp[_i] == col1) { \
75           if (addv == ADD_VALUES) ap[_i] += value;   \
76           else                  ap[_i] = value; \
77           goto a_noinsert; \
78         } \
79       }  \
80       if (nonew == 1) goto a_noinsert; \
81       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
82       if (nrow >= rmax) { \
83         /* there is no extra room in row, therefore enlarge */ \
84         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
85         Scalar *new_a; \
86  \
87         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
88  \
89         /* malloc new storage space */ \
90         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
91         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
92         new_j   = (int *) (new_a + new_nz); \
93         new_i   = new_j + new_nz; \
94  \
95         /* copy over old data into new slots */ \
96         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
97         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
98         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
99         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
100         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
101                                                            len*sizeof(int)); \
102         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
103         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
104                                                            len*sizeof(Scalar));  \
105         /* free up old matrix storage */ \
106  \
107         PetscFree(a->a);  \
108         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
109         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
110         a->singlemalloc = 1; \
111  \
112         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
113         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
114         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
115         a->maxnz += CHUNKSIZE; \
116         a->reallocs++; \
117       } \
118       N = nrow++ - 1; a->nz++; \
119       /* shift up all the later entries in this row */ \
120       for ( ii=N; ii>=_i; ii-- ) { \
121         rp[ii+1] = rp[ii]; \
122         ap[ii+1] = ap[ii]; \
123       } \
124       rp[_i] = col1;  \
125       ap[_i] = value;  \
126       a_noinsert: ; \
127       ailen[row] = nrow; \
128 }
129 
130 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
131 { \
132  \
133     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
134     rmax = bimax[row]; nrow = bilen[row];  \
135     col1 = col - shift; \
136      \
137     low = 0; high = nrow; \
138     while (high-low > 5) { \
139       t = (low+high)/2; \
140       if (rp[t] > col) high = t; \
141       else             low  = t; \
142     } \
143        for ( _i=0; _i<nrow; _i++ ) { \
144         if (rp[_i] > col1) break; \
145         if (rp[_i] == col1) { \
146           if (addv == ADD_VALUES) ap[_i] += value;   \
147           else                  ap[_i] = value; \
148           goto b_noinsert; \
149         } \
150       }  \
151       if (nonew == 1) goto b_noinsert; \
152       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
171         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
174         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
175                                                            len*sizeof(Scalar));  \
176         /* free up old matrix storage */ \
177  \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
184         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
186         b->maxnz += CHUNKSIZE; \
187         b->reallocs++; \
188       } \
189       N = nrow++ - 1; b->nz++; \
190       /* shift up all the later entries in this row */ \
191       for ( ii=N; ii>=_i; ii-- ) { \
192         rp[ii+1] = rp[ii]; \
193         ap[ii+1] = ap[ii]; \
194       } \
195       rp[_i] = col1;  \
196       ap[_i] = value;  \
197       b_noinsert: ; \
198       bilen[row] = nrow; \
199 }
200 
201 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
202 #undef __FUNC__
203 #define __FUNC__ "MatSetValues_MPIAIJ"
204 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
205 {
206   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
207   Scalar     value;
208   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
209   int        cstart = aij->cstart, cend = aij->cend,row,col;
210   int        roworiented = aij->roworiented;
211 
212   /* Some Variables required in the macro */
213   Mat        A = aij->A;
214   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
215   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
216   Scalar     *aa = a->a;
217 
218   Mat        B = aij->B;
219   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
220   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
221   Scalar     *ba = b->a;
222 
223   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
224   int        nonew = a->nonew,shift = a->indexshift;
225   Scalar     *ap;
226 
227   for ( i=0; i<m; i++ ) {
228 #if defined(PETSC_BOPT_g)
229     if (im[i] < 0) SETERRQ(1,0,"Negative row");
230     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
231 #endif
232     if (im[i] >= rstart && im[i] < rend) {
233       row = im[i] - rstart;
234       for ( j=0; j<n; j++ ) {
235         if (in[j] >= cstart && in[j] < cend){
236           col = in[j] - cstart;
237           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
238           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
239           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
240         }
241 #if defined(PETSC_BOPT_g)
242         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
243         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
244 #endif
245         else {
246           if (mat->was_assembled) {
247             if (!aij->colmap) {
248               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
249             }
250             col = aij->colmap[in[j]] - 1;
251             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
252               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
253               col =  in[j];
254               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
255               B = aij->B;
256               b = (Mat_SeqAIJ *) B->data;
257               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
258               ba = b->a;
259             }
260           }
261           else col = in[j];
262           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
263           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
264           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
265         }
266       }
267     }
268     else {
269       if (roworiented && !aij->donotstash) {
270         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
271       }
272       else {
273         if (!aij->donotstash) {
274           row = im[i];
275           for ( j=0; j<n; j++ ) {
276             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
277           }
278         }
279       }
280     }
281   }
282   return 0;
283 }
284 
285 #undef __FUNC__
286 #define __FUNC__ "MatGetValues_MPIAIJ"
287 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
290   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
291   int        cstart = aij->cstart, cend = aij->cend,row,col;
292 
293   for ( i=0; i<m; i++ ) {
294     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
295     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
296     if (idxm[i] >= rstart && idxm[i] < rend) {
297       row = idxm[i] - rstart;
298       for ( j=0; j<n; j++ ) {
299         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
300         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
301         if (idxn[j] >= cstart && idxn[j] < cend){
302           col = idxn[j] - cstart;
303           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
304         }
305         else {
306           if (!aij->colmap) {
307             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
308           }
309           col = aij->colmap[idxn[j]] - 1;
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
313           }
314         }
315       }
316     }
317     else {
318       SETERRQ(1,0,"Only local values currently supported");
319     }
320   }
321   return 0;
322 }
323 
324 #undef __FUNC__
325 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
326 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
327 {
328   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
329   MPI_Comm    comm = mat->comm;
330   int         size = aij->size, *owners = aij->rowners;
331   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
332   MPI_Request *send_waits,*recv_waits;
333   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
334   InsertMode  addv;
335   Scalar      *rvalues,*svalues;
336 
337   /* make sure all processors are either in INSERTMODE or ADDMODE */
338   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
339   if (addv == (ADD_VALUES|INSERT_VALUES)) {
340     SETERRQ(1,0,"Some processors inserted others added");
341   }
342   mat->insertmode = addv; /* in case this processor had no cache */
343 
344   /*  first count number of contributors to each processor */
345   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
346   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
347   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
348   for ( i=0; i<aij->stash.n; i++ ) {
349     idx = aij->stash.idx[i];
350     for ( j=0; j<size; j++ ) {
351       if (idx >= owners[j] && idx < owners[j+1]) {
352         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
353       }
354     }
355   }
356   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
357 
358   /* inform other processors of number of messages and max length*/
359   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
360   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
361   nreceives = work[rank];
362   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
363   nmax = work[rank];
364   PetscFree(work);
365 
366   /* post receives:
367        1) each message will consist of ordered pairs
368      (global index,value) we store the global index as a double
369      to simplify the message passing.
370        2) since we don't know how long each individual message is we
371      allocate the largest needed buffer for each receive. Potentially
372      this is a lot of wasted space.
373 
374 
375        This could be done better.
376   */
377   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
378   CHKPTRQ(rvalues);
379   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
380   CHKPTRQ(recv_waits);
381   for ( i=0; i<nreceives; i++ ) {
382     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
383               comm,recv_waits+i);
384   }
385 
386   /* do sends:
387       1) starts[i] gives the starting index in svalues for stuff going to
388          the ith processor
389   */
390   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
391   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
392   CHKPTRQ(send_waits);
393   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
394   starts[0] = 0;
395   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
396   for ( i=0; i<aij->stash.n; i++ ) {
397     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
398     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
399     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
400   }
401   PetscFree(owner);
402   starts[0] = 0;
403   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
404   count = 0;
405   for ( i=0; i<size; i++ ) {
406     if (procs[i]) {
407       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
408                 comm,send_waits+count++);
409     }
410   }
411   PetscFree(starts); PetscFree(nprocs);
412 
413   /* Free cache space */
414   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
415   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
416 
417   aij->svalues    = svalues;    aij->rvalues    = rvalues;
418   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
419   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
420   aij->rmax       = nmax;
421 
422   return 0;
423 }
424 extern int MatSetUpMultiply_MPIAIJ(Mat);
425 
426 #undef __FUNC__
427 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
428 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
429 {
430   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
431   MPI_Status  *send_status,recv_status;
432   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
433   int         row,col,other_disassembled;
434   Scalar      *values,val;
435   InsertMode  addv = mat->insertmode;
436 
437   /*  wait on receives */
438   while (count) {
439     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
440     /* unpack receives into our local space */
441     values = aij->rvalues + 3*imdex*aij->rmax;
442     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
443     n = n/3;
444     for ( i=0; i<n; i++ ) {
445       row = (int) PetscReal(values[3*i]) - aij->rstart;
446       col = (int) PetscReal(values[3*i+1]);
447       val = values[3*i+2];
448       if (col >= aij->cstart && col < aij->cend) {
449         col -= aij->cstart;
450         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
451       }
452       else {
453         if (mat->was_assembled || mat->assembled) {
454           if (!aij->colmap) {
455             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
456           }
457           col = aij->colmap[col] - 1;
458           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
459             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460             col = (int) PetscReal(values[3*i+1]);
461           }
462         }
463         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
464       }
465     }
466     count--;
467   }
468   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
469 
470   /* wait on sends */
471   if (aij->nsends) {
472     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
473     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
474     PetscFree(send_status);
475   }
476   PetscFree(aij->send_waits); PetscFree(aij->svalues);
477 
478   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
479   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
480 
481   /* determine if any processor has disassembled, if so we must
482      also disassemble ourselfs, in order that we may reassemble. */
483   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
484   if (mat->was_assembled && !other_disassembled) {
485     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
486   }
487 
488   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
489     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
490   }
491   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
492   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
493 
494   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
495   return 0;
496 }
497 
498 #undef __FUNC__
499 #define __FUNC__ "MatZeroEntries_MPIAIJ"
500 int MatZeroEntries_MPIAIJ(Mat A)
501 {
502   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
503   int        ierr;
504   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
505   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
506   return 0;
507 }
508 
509 /* the code does not do the diagonal entries correctly unless the
510    matrix is square and the column and row owerships are identical.
511    This is a BUG. The only way to fix it seems to be to access
512    aij->A and aij->B directly and not through the MatZeroRows()
513    routine.
514 */
515 #undef __FUNC__
516 #define __FUNC__ "MatZeroRows_MPIAIJ"
517 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
518 {
519   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
520   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
521   int            *procs,*nprocs,j,found,idx,nsends,*work;
522   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
523   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
524   int            *lens,imdex,*lrows,*values;
525   MPI_Comm       comm = A->comm;
526   MPI_Request    *send_waits,*recv_waits;
527   MPI_Status     recv_status,*send_status;
528   IS             istmp;
529 
530   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
531   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
532 
533   /*  first count number of contributors to each processor */
534   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
535   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
536   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
537   for ( i=0; i<N; i++ ) {
538     idx = rows[i];
539     found = 0;
540     for ( j=0; j<size; j++ ) {
541       if (idx >= owners[j] && idx < owners[j+1]) {
542         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
543       }
544     }
545     if (!found) SETERRQ(1,0,"Index out of range");
546   }
547   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
548 
549   /* inform other processors of number of messages and max length*/
550   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
551   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
552   nrecvs = work[rank];
553   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
554   nmax = work[rank];
555   PetscFree(work);
556 
557   /* post receives:   */
558   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
559   CHKPTRQ(rvalues);
560   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
561   CHKPTRQ(recv_waits);
562   for ( i=0; i<nrecvs; i++ ) {
563     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
564   }
565 
566   /* do sends:
567       1) starts[i] gives the starting index in svalues for stuff going to
568          the ith processor
569   */
570   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
571   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
572   CHKPTRQ(send_waits);
573   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
574   starts[0] = 0;
575   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
576   for ( i=0; i<N; i++ ) {
577     svalues[starts[owner[i]]++] = rows[i];
578   }
579   ISRestoreIndices(is,&rows);
580 
581   starts[0] = 0;
582   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
583   count = 0;
584   for ( i=0; i<size; i++ ) {
585     if (procs[i]) {
586       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
587     }
588   }
589   PetscFree(starts);
590 
591   base = owners[rank];
592 
593   /*  wait on receives */
594   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
595   source = lens + nrecvs;
596   count  = nrecvs; slen = 0;
597   while (count) {
598     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
599     /* unpack receives into our local space */
600     MPI_Get_count(&recv_status,MPI_INT,&n);
601     source[imdex]  = recv_status.MPI_SOURCE;
602     lens[imdex]  = n;
603     slen += n;
604     count--;
605   }
606   PetscFree(recv_waits);
607 
608   /* move the data into the send scatter */
609   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
610   count = 0;
611   for ( i=0; i<nrecvs; i++ ) {
612     values = rvalues + i*nmax;
613     for ( j=0; j<lens[i]; j++ ) {
614       lrows[count++] = values[j] - base;
615     }
616   }
617   PetscFree(rvalues); PetscFree(lens);
618   PetscFree(owner); PetscFree(nprocs);
619 
620   /* actually zap the local rows */
621   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
622   PLogObjectParent(A,istmp);
623   PetscFree(lrows);
624   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
625   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
626   ierr = ISDestroy(istmp); CHKERRQ(ierr);
627 
628   /* wait on sends */
629   if (nsends) {
630     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
631     CHKPTRQ(send_status);
632     MPI_Waitall(nsends,send_waits,send_status);
633     PetscFree(send_status);
634   }
635   PetscFree(send_waits); PetscFree(svalues);
636 
637   return 0;
638 }
639 
640 #undef __FUNC__
641 #define __FUNC__ "MatMult_MPIAIJ"
642 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
645   int        ierr,nt;
646 
647   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
648   if (nt != a->n) {
649     SETERRQ(1,0,"Incompatible partition of A and xx");
650   }
651   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
652   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
653   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
654   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
655   return 0;
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ "MatMultAdd_MPIAIJ"
660 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
661 {
662   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
663   int        ierr;
664   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
665   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
666   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
667   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
668   return 0;
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatMultTrans_MPIAIJ"
673 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676   int        ierr;
677 
678   /* do nondiagonal part */
679   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
680   /* send it on its way */
681   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682   /* do local part */
683   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
684   /* receive remote parts: note this assumes the values are not actually */
685   /* inserted in yy until the next line, which is true for my implementation*/
686   /* but is not perhaps always true. */
687   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
688   return 0;
689 }
690 
691 #undef __FUNC__
692 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
693 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
694 {
695   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696   int        ierr;
697 
698   /* do nondiagonal part */
699   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
700   /* send it on its way */
701   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
702   /* do local part */
703   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
704   /* receive remote parts: note this assumes the values are not actually */
705   /* inserted in yy until the next line, which is true for my implementation*/
706   /* but is not perhaps always true. */
707   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
708   return 0;
709 }
710 
711 /*
712   This only works correctly for square matrices where the subblock A->A is the
713    diagonal block
714 */
715 #undef __FUNC__
716 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
717 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
718 {
719   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
720   if (a->M != a->N)
721     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
722   if (a->rstart != a->cstart || a->rend != a->cend) {
723     SETERRQ(1,0,"row partition must equal col partition");  }
724   return MatGetDiagonal(a->A,v);
725 }
726 
727 #undef __FUNC__
728 #define __FUNC__ "MatScale_MPIAIJ"
729 int MatScale_MPIAIJ(Scalar *aa,Mat A)
730 {
731   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
732   int        ierr;
733   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
734   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
735   return 0;
736 }
737 
738 #undef __FUNC__
739 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
740 int MatDestroy_MPIAIJ(PetscObject obj)
741 {
742   Mat        mat = (Mat) obj;
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
744   int        ierr;
745 #if defined(PETSC_LOG)
746   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
747 #endif
748   PetscFree(aij->rowners);
749   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
750   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
751   if (aij->colmap) PetscFree(aij->colmap);
752   if (aij->garray) PetscFree(aij->garray);
753   if (aij->lvec)   VecDestroy(aij->lvec);
754   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
755   if (aij->rowvalues) PetscFree(aij->rowvalues);
756   PetscFree(aij);
757   if (mat->mapping) {
758     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
759   }
760   PLogObjectDestroy(mat);
761   PetscHeaderDestroy(mat);
762   return 0;
763 }
764 
765 #undef __FUNC__
766 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
767 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
768 {
769   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
770   int         ierr;
771 
772   if (aij->size == 1) {
773     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
774   }
775   else SETERRQ(1,0,"Only uniprocessor output supported");
776   return 0;
777 }
778 
779 #undef __FUNC__
780 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
781 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
782 {
783   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
784   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
785   int         ierr, format,shift = C->indexshift,rank;
786   FILE        *fd;
787   ViewerType  vtype;
788 
789   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
790   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
791     ierr = ViewerGetFormat(viewer,&format);
792     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
793       MatInfo info;
794       int     flg;
795       MPI_Comm_rank(mat->comm,&rank);
796       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
797       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
798       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
799       PetscSequentialPhaseBegin(mat->comm,1);
800       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
801          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
802       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
803          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
804       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
805       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
806       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
807       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
808       fflush(fd);
809       PetscSequentialPhaseEnd(mat->comm,1);
810       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
811       return 0;
812     }
813     else if (format == VIEWER_FORMAT_ASCII_INFO) {
814       return 0;
815     }
816   }
817 
818   if (vtype == DRAW_VIEWER) {
819     Draw       draw;
820     PetscTruth isnull;
821     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
822     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
823   }
824 
825   if (vtype == ASCII_FILE_VIEWER) {
826     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
827     PetscSequentialPhaseBegin(mat->comm,1);
828     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
829            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
830            aij->cend);
831     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
832     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
833     fflush(fd);
834     PetscSequentialPhaseEnd(mat->comm,1);
835   }
836   else {
837     int size = aij->size;
838     rank = aij->rank;
839     if (size == 1) {
840       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
841     }
842     else {
843       /* assemble the entire matrix onto first processor. */
844       Mat         A;
845       Mat_SeqAIJ *Aloc;
846       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
847       Scalar      *a;
848 
849       if (!rank) {
850         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
851                CHKERRQ(ierr);
852       }
853       else {
854         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
855                CHKERRQ(ierr);
856       }
857       PLogObjectParent(mat,A);
858 
859       /* copy over the A part */
860       Aloc = (Mat_SeqAIJ*) aij->A->data;
861       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
862       row = aij->rstart;
863       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
864       for ( i=0; i<m; i++ ) {
865         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
866         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
867       }
868       aj = Aloc->j;
869       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
870 
871       /* copy over the B part */
872       Aloc = (Mat_SeqAIJ*) aij->B->data;
873       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
874       row = aij->rstart;
875       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
876       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
877       for ( i=0; i<m; i++ ) {
878         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
879         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
880       }
881       PetscFree(ct);
882       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
883       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
884       if (!rank) {
885         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
886       }
887       ierr = MatDestroy(A); CHKERRQ(ierr);
888     }
889   }
890   return 0;
891 }
892 
893 #undef __FUNC__
894 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
895 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
896 {
897   Mat         mat = (Mat) obj;
898   int         ierr;
899   ViewerType  vtype;
900 
901   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
902   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
903       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
904     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
905   }
906   else if (vtype == BINARY_FILE_VIEWER) {
907     return MatView_MPIAIJ_Binary(mat,viewer);
908   }
909   return 0;
910 }
911 
912 /*
913     This has to provide several versions.
914 
915      2) a) use only local smoothing updating outer values only once.
916         b) local smoothing updating outer values each inner iteration
917      3) color updating out values betwen colors.
918 */
919 #undef __FUNC__
920 #define __FUNC__ "MatRelax_MPIAIJ"
921 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
922                            double fshift,int its,Vec xx)
923 {
924   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
925   Mat        AA = mat->A, BB = mat->B;
926   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
927   Scalar     *b,*x,*xs,*ls,d,*v,sum;
928   int        ierr,*idx, *diag;
929   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
930 
931   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
932   xs = x + shift; /* shift by one for index start of 1 */
933   ls = ls + shift;
934   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
935   diag = A->diag;
936   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
937     if (flag & SOR_ZERO_INITIAL_GUESS) {
938       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
939     }
940     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
941     CHKERRQ(ierr);
942     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
943     CHKERRQ(ierr);
944     while (its--) {
945       /* go down through the rows */
946       for ( i=0; i<m; i++ ) {
947         n    = A->i[i+1] - A->i[i];
948         idx  = A->j + A->i[i] + shift;
949         v    = A->a + A->i[i] + shift;
950         sum  = b[i];
951         SPARSEDENSEMDOT(sum,xs,v,idx,n);
952         d    = fshift + A->a[diag[i]+shift];
953         n    = B->i[i+1] - B->i[i];
954         idx  = B->j + B->i[i] + shift;
955         v    = B->a + B->i[i] + shift;
956         SPARSEDENSEMDOT(sum,ls,v,idx,n);
957         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
958       }
959       /* come up through the rows */
960       for ( i=m-1; i>-1; i-- ) {
961         n    = A->i[i+1] - A->i[i];
962         idx  = A->j + A->i[i] + shift;
963         v    = A->a + A->i[i] + shift;
964         sum  = b[i];
965         SPARSEDENSEMDOT(sum,xs,v,idx,n);
966         d    = fshift + A->a[diag[i]+shift];
967         n    = B->i[i+1] - B->i[i];
968         idx  = B->j + B->i[i] + shift;
969         v    = B->a + B->i[i] + shift;
970         SPARSEDENSEMDOT(sum,ls,v,idx,n);
971         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
972       }
973     }
974   }
975   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
976     if (flag & SOR_ZERO_INITIAL_GUESS) {
977       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
978     }
979     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
980     CHKERRQ(ierr);
981     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
982     CHKERRQ(ierr);
983     while (its--) {
984       for ( i=0; i<m; i++ ) {
985         n    = A->i[i+1] - A->i[i];
986         idx  = A->j + A->i[i] + shift;
987         v    = A->a + A->i[i] + shift;
988         sum  = b[i];
989         SPARSEDENSEMDOT(sum,xs,v,idx,n);
990         d    = fshift + A->a[diag[i]+shift];
991         n    = B->i[i+1] - B->i[i];
992         idx  = B->j + B->i[i] + shift;
993         v    = B->a + B->i[i] + shift;
994         SPARSEDENSEMDOT(sum,ls,v,idx,n);
995         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
996       }
997     }
998   }
999   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1000     if (flag & SOR_ZERO_INITIAL_GUESS) {
1001       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
1002     }
1003     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1004                             mat->Mvctx); CHKERRQ(ierr);
1005     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1006                             mat->Mvctx); CHKERRQ(ierr);
1007     while (its--) {
1008       for ( i=m-1; i>-1; i-- ) {
1009         n    = A->i[i+1] - A->i[i];
1010         idx  = A->j + A->i[i] + shift;
1011         v    = A->a + A->i[i] + shift;
1012         sum  = b[i];
1013         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1014         d    = fshift + A->a[diag[i]+shift];
1015         n    = B->i[i+1] - B->i[i];
1016         idx  = B->j + B->i[i] + shift;
1017         v    = B->a + B->i[i] + shift;
1018         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1019         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1020       }
1021     }
1022   }
1023   else {
1024     SETERRQ(1,0,"Option not supported");
1025   }
1026   return 0;
1027 }
1028 
1029 #undef __FUNC__
1030 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1031 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1032 {
1033   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1034   Mat        A = mat->A, B = mat->B;
1035   int        ierr;
1036   double     isend[5], irecv[5];
1037 
1038   info->rows_global    = (double)mat->M;
1039   info->columns_global = (double)mat->N;
1040   info->rows_local     = (double)mat->m;
1041   info->columns_local  = (double)mat->N;
1042   info->block_size     = 1.0;
1043   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1044   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1045   isend[3] = info->memory;  isend[4] = info->mallocs;
1046   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1047   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1048   isend[3] += info->memory;  isend[4] += info->mallocs;
1049   if (flag == MAT_LOCAL) {
1050     info->nz_used      = isend[0];
1051     info->nz_allocated = isend[1];
1052     info->nz_unneeded  = isend[2];
1053     info->memory       = isend[3];
1054     info->mallocs      = isend[4];
1055   } else if (flag == MAT_GLOBAL_MAX) {
1056     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1057     info->nz_used      = irecv[0];
1058     info->nz_allocated = irecv[1];
1059     info->nz_unneeded  = irecv[2];
1060     info->memory       = irecv[3];
1061     info->mallocs      = irecv[4];
1062   } else if (flag == MAT_GLOBAL_SUM) {
1063     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1064     info->nz_used      = irecv[0];
1065     info->nz_allocated = irecv[1];
1066     info->nz_unneeded  = irecv[2];
1067     info->memory       = irecv[3];
1068     info->mallocs      = irecv[4];
1069   }
1070   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1071   info->fill_ratio_needed = 0;
1072   info->factor_mallocs    = 0;
1073 
1074   return 0;
1075 }
1076 
1077 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1078 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1079 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1080 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1081 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1082 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1083 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1084 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1085 
1086 #undef __FUNC__
1087 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1088 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1089 {
1090   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1091 
1092   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1093       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1094       op == MAT_COLUMNS_UNSORTED ||
1095       op == MAT_COLUMNS_SORTED ||
1096       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1097       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1098         MatSetOption(a->A,op);
1099         MatSetOption(a->B,op);
1100   } else if (op == MAT_ROW_ORIENTED) {
1101     a->roworiented = 1;
1102     MatSetOption(a->A,op);
1103     MatSetOption(a->B,op);
1104   } else if (op == MAT_ROWS_SORTED ||
1105              op == MAT_ROWS_UNSORTED ||
1106              op == MAT_SYMMETRIC ||
1107              op == MAT_STRUCTURALLY_SYMMETRIC ||
1108              op == MAT_YES_NEW_DIAGONALS)
1109     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1110   else if (op == MAT_COLUMN_ORIENTED) {
1111     a->roworiented = 0;
1112     MatSetOption(a->A,op);
1113     MatSetOption(a->B,op);
1114   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1115     a->donotstash = 1;
1116   } else if (op == MAT_NO_NEW_DIAGONALS)
1117     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1118   else
1119     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1120   return 0;
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1125 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1126 {
1127   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1128   *m = mat->M; *n = mat->N;
1129   return 0;
1130 }
1131 
1132 #undef __FUNC__
1133 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1134 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1135 {
1136   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1137   *m = mat->m; *n = mat->N;
1138   return 0;
1139 }
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1143 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1144 {
1145   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1146   *m = mat->rstart; *n = mat->rend;
1147   return 0;
1148 }
1149 
1150 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1151 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1152 
1153 #undef __FUNC__
1154 #define __FUNC__ "MatGetRow_MPIAIJ"
1155 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1156 {
1157   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1158   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1159   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1160   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1161   int        *cmap, *idx_p;
1162 
1163   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1164   mat->getrowactive = PETSC_TRUE;
1165 
1166   if (!mat->rowvalues && (idx || v)) {
1167     /*
1168         allocate enough space to hold information from the longest row.
1169     */
1170     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1171     int     max = 1,m = mat->m,tmp;
1172     for ( i=0; i<m; i++ ) {
1173       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1174       if (max < tmp) { max = tmp; }
1175     }
1176     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1177     CHKPTRQ(mat->rowvalues);
1178     mat->rowindices = (int *) (mat->rowvalues + max);
1179   }
1180 
1181   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1182   lrow = row - rstart;
1183 
1184   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1185   if (!v)   {pvA = 0; pvB = 0;}
1186   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1187   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1188   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1189   nztot = nzA + nzB;
1190 
1191   cmap  = mat->garray;
1192   if (v  || idx) {
1193     if (nztot) {
1194       /* Sort by increasing column numbers, assuming A and B already sorted */
1195       int imark = -1;
1196       if (v) {
1197         *v = v_p = mat->rowvalues;
1198         for ( i=0; i<nzB; i++ ) {
1199           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1200           else break;
1201         }
1202         imark = i;
1203         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1204         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1205       }
1206       if (idx) {
1207         *idx = idx_p = mat->rowindices;
1208         if (imark > -1) {
1209           for ( i=0; i<imark; i++ ) {
1210             idx_p[i] = cmap[cworkB[i]];
1211           }
1212         } else {
1213           for ( i=0; i<nzB; i++ ) {
1214             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1215             else break;
1216           }
1217           imark = i;
1218         }
1219         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1220         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1221       }
1222     }
1223     else {
1224       if (idx) *idx = 0;
1225       if (v)   *v   = 0;
1226     }
1227   }
1228   *nz = nztot;
1229   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1230   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1231   return 0;
1232 }
1233 
1234 #undef __FUNC__
1235 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1236 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1237 {
1238   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1239   if (aij->getrowactive == PETSC_FALSE) {
1240     SETERRQ(1,0,"MatGetRow not called");
1241   }
1242   aij->getrowactive = PETSC_FALSE;
1243   return 0;
1244 }
1245 
1246 #undef __FUNC__
1247 #define __FUNC__ "MatNorm_MPIAIJ"
1248 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1249 {
1250   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1251   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1252   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1253   double     sum = 0.0;
1254   Scalar     *v;
1255 
1256   if (aij->size == 1) {
1257     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1258   } else {
1259     if (type == NORM_FROBENIUS) {
1260       v = amat->a;
1261       for (i=0; i<amat->nz; i++ ) {
1262 #if defined(PETSC_COMPLEX)
1263         sum += real(conj(*v)*(*v)); v++;
1264 #else
1265         sum += (*v)*(*v); v++;
1266 #endif
1267       }
1268       v = bmat->a;
1269       for (i=0; i<bmat->nz; i++ ) {
1270 #if defined(PETSC_COMPLEX)
1271         sum += real(conj(*v)*(*v)); v++;
1272 #else
1273         sum += (*v)*(*v); v++;
1274 #endif
1275       }
1276       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1277       *norm = sqrt(*norm);
1278     }
1279     else if (type == NORM_1) { /* max column norm */
1280       double *tmp, *tmp2;
1281       int    *jj, *garray = aij->garray;
1282       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1283       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1284       PetscMemzero(tmp,aij->N*sizeof(double));
1285       *norm = 0.0;
1286       v = amat->a; jj = amat->j;
1287       for ( j=0; j<amat->nz; j++ ) {
1288         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1289       }
1290       v = bmat->a; jj = bmat->j;
1291       for ( j=0; j<bmat->nz; j++ ) {
1292         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1293       }
1294       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1295       for ( j=0; j<aij->N; j++ ) {
1296         if (tmp2[j] > *norm) *norm = tmp2[j];
1297       }
1298       PetscFree(tmp); PetscFree(tmp2);
1299     }
1300     else if (type == NORM_INFINITY) { /* max row norm */
1301       double ntemp = 0.0;
1302       for ( j=0; j<amat->m; j++ ) {
1303         v = amat->a + amat->i[j] + shift;
1304         sum = 0.0;
1305         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1306           sum += PetscAbsScalar(*v); v++;
1307         }
1308         v = bmat->a + bmat->i[j] + shift;
1309         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1310           sum += PetscAbsScalar(*v); v++;
1311         }
1312         if (sum > ntemp) ntemp = sum;
1313       }
1314       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1315     }
1316     else {
1317       SETERRQ(1,0,"No support for two norm");
1318     }
1319   }
1320   return 0;
1321 }
1322 
1323 #undef __FUNC__
1324 #define __FUNC__ "MatTranspose_MPIAIJ"
1325 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1326 {
1327   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1328   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1329   int        ierr,shift = Aloc->indexshift;
1330   Mat        B;
1331   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1332   Scalar     *array;
1333 
1334   if (matout == PETSC_NULL && M != N)
1335     SETERRQ(1,0,"Square matrix only for in-place");
1336   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1337          PETSC_NULL,&B); CHKERRQ(ierr);
1338 
1339   /* copy over the A part */
1340   Aloc = (Mat_SeqAIJ*) a->A->data;
1341   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1342   row = a->rstart;
1343   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1344   for ( i=0; i<m; i++ ) {
1345     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1346     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1347   }
1348   aj = Aloc->j;
1349   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1350 
1351   /* copy over the B part */
1352   Aloc = (Mat_SeqAIJ*) a->B->data;
1353   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1354   row = a->rstart;
1355   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1356   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1357   for ( i=0; i<m; i++ ) {
1358     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1359     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1360   }
1361   PetscFree(ct);
1362   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1363   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1364   if (matout != PETSC_NULL) {
1365     *matout = B;
1366   } else {
1367     /* This isn't really an in-place transpose .... but free data structures from a */
1368     PetscFree(a->rowners);
1369     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1370     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1371     if (a->colmap) PetscFree(a->colmap);
1372     if (a->garray) PetscFree(a->garray);
1373     if (a->lvec) VecDestroy(a->lvec);
1374     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1375     PetscFree(a);
1376     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1377     PetscHeaderDestroy(B);
1378   }
1379   return 0;
1380 }
1381 
1382 #undef __FUNC__
1383 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1384 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1385 {
1386   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1387   Mat a = aij->A, b = aij->B;
1388   int ierr,s1,s2,s3;
1389 
1390   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1391   if (rr) {
1392     s3 = aij->n;
1393     VecGetLocalSize_Fast(rr,s1);
1394     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1395     /* Overlap communication with computation. */
1396     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1397   }
1398   if (ll) {
1399     VecGetLocalSize_Fast(ll,s1);
1400     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1401     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1402   }
1403   /* scale  the diagonal block */
1404   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1405 
1406   if (rr) {
1407     /* Do a scatter end and then right scale the off-diagonal block */
1408     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1409     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1410   }
1411 
1412   return 0;
1413 }
1414 
1415 
1416 extern int MatPrintHelp_SeqAIJ(Mat);
1417 #undef __FUNC__
1418 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1419 int MatPrintHelp_MPIAIJ(Mat A)
1420 {
1421   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1422 
1423   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1424   else return 0;
1425 }
1426 
1427 #undef __FUNC__
1428 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1429 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1430 {
1431   *bs = 1;
1432   return 0;
1433 }
1434 #undef __FUNC__
1435 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1436 int MatSetUnfactored_MPIAIJ(Mat A)
1437 {
1438   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1439   int        ierr;
1440   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1441   return 0;
1442 }
1443 
1444 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1445 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1446 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1447 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1448 /* -------------------------------------------------------------------*/
1449 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1450        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1451        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1452        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1453        0,0,
1454        0,0,
1455        0,0,
1456        MatRelax_MPIAIJ,
1457        MatTranspose_MPIAIJ,
1458        MatGetInfo_MPIAIJ,0,
1459        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1460        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1461        0,
1462        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1463        0,0,0,0,
1464        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1465        0,0,
1466        0,0,MatConvertSameType_MPIAIJ,0,0,
1467        0,0,0,
1468        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1469        MatPrintHelp_MPIAIJ,
1470        MatScale_MPIAIJ,0,0,0,
1471        MatGetBlockSize_MPIAIJ,0,0,0,0,
1472        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1473 
1474 
1475 #undef __FUNC__
1476 #define __FUNC__ "MatCreateMPIAIJ"
1477 /*@C
1478    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1479    (the default parallel PETSc format).  For good matrix assembly performance
1480    the user should preallocate the matrix storage by setting the parameters
1481    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1482    performance can be increased by more than a factor of 50.
1483 
1484    Input Parameters:
1485 .  comm - MPI communicator
1486 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1487            This value should be the same as the local size used in creating the
1488            y vector for the matrix-vector product y = Ax.
1489 .  n - This value should be the same as the local size used in creating the
1490        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1491        calculated if N is given) For square matrices n is almost always m.
1492 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1493 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1494 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1495            (same for all local rows)
1496 .  d_nzz - array containing the number of nonzeros in the various rows of the
1497            diagonal portion of the local submatrix (possibly different for each row)
1498            or PETSC_NULL. You must leave room for the diagonal entry even if
1499            it is zero.
1500 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1501            submatrix (same for all local rows).
1502 .  o_nzz - array containing the number of nonzeros in the various rows of the
1503            off-diagonal portion of the local submatrix (possibly different for
1504            each row) or PETSC_NULL.
1505 
1506    Output Parameter:
1507 .  A - the matrix
1508 
1509    Notes:
1510    The AIJ format (also called the Yale sparse matrix format or
1511    compressed row storage), is fully compatible with standard Fortran 77
1512    storage.  That is, the stored row and column indices can begin at
1513    either one (as in Fortran) or zero.  See the users manual for details.
1514 
1515    The user MUST specify either the local or global matrix dimensions
1516    (possibly both).
1517 
1518    By default, this format uses inodes (identical nodes) when possible.
1519    We search for consecutive rows with the same nonzero structure, thereby
1520    reusing matrix information to achieve increased efficiency.
1521 
1522    Options Database Keys:
1523 $    -mat_aij_no_inode  - Do not use inodes
1524 $    -mat_aij_inode_limit <limit> - Set inode limit.
1525 $        (max limit=5)
1526 $    -mat_aij_oneindex - Internally use indexing starting at 1
1527 $        rather than 0.  Note: When calling MatSetValues(),
1528 $        the user still MUST index entries starting at 0!
1529 
1530    Storage Information:
1531    For a square global matrix we define each processor's diagonal portion
1532    to be its local rows and the corresponding columns (a square submatrix);
1533    each processor's off-diagonal portion encompasses the remainder of the
1534    local matrix (a rectangular submatrix).
1535 
1536    The user can specify preallocated storage for the diagonal part of
1537    the local submatrix with either d_nz or d_nnz (not both).  Set
1538    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1539    memory allocation.  Likewise, specify preallocated storage for the
1540    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1541 
1542    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1543    the figure below we depict these three local rows and all columns (0-11).
1544 
1545 $          0 1 2 3 4 5 6 7 8 9 10 11
1546 $         -------------------
1547 $  row 3  |  o o o d d d o o o o o o
1548 $  row 4  |  o o o d d d o o o o o o
1549 $  row 5  |  o o o d d d o o o o o o
1550 $         -------------------
1551 $
1552 
1553    Thus, any entries in the d locations are stored in the d (diagonal)
1554    submatrix, and any entries in the o locations are stored in the
1555    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1556    stored simply in the MATSEQAIJ format for compressed row storage.
1557 
1558    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1559    and o_nz should indicate the number of nonzeros per row in the o matrix.
1560    In general, for PDE problems in which most nonzeros are near the diagonal,
1561    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1562    or you will get TERRIBLE performance; see the users' manual chapter on
1563    matrices.
1564 
1565 .keywords: matrix, aij, compressed row, sparse, parallel
1566 
1567 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1568 @*/
1569 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1570                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1571 {
1572   Mat          B;
1573   Mat_MPIAIJ   *b;
1574   int          ierr, i,sum[2],work[2],size;
1575 
1576   *A = 0;
1577   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1578   PLogObjectCreate(B);
1579   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1580   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1581   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1582   MPI_Comm_size(comm,&size);
1583   if (size == 1) {
1584     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1585     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1586     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1587     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1588     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1589     B->ops.solve             = MatSolve_MPIAIJ;
1590     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1591     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1592     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1593     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1594   }
1595   B->destroy    = MatDestroy_MPIAIJ;
1596   B->view       = MatView_MPIAIJ;
1597   B->factor     = 0;
1598   B->assembled  = PETSC_FALSE;
1599   B->mapping    = 0;
1600 
1601   B->insertmode = NOT_SET_VALUES;
1602   MPI_Comm_rank(comm,&b->rank);
1603   MPI_Comm_size(comm,&b->size);
1604 
1605   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1606     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1607 
1608   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1609     work[0] = m; work[1] = n;
1610     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1611     if (M == PETSC_DECIDE) M = sum[0];
1612     if (N == PETSC_DECIDE) N = sum[1];
1613   }
1614   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1615   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1616   b->m = m; B->m = m;
1617   b->n = n; B->n = n;
1618   b->N = N; B->N = N;
1619   b->M = M; B->M = M;
1620 
1621   /* build local table of row and column ownerships */
1622   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1623   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1624   b->cowners = b->rowners + b->size + 2;
1625   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1626   b->rowners[0] = 0;
1627   for ( i=2; i<=b->size; i++ ) {
1628     b->rowners[i] += b->rowners[i-1];
1629   }
1630   b->rstart = b->rowners[b->rank];
1631   b->rend   = b->rowners[b->rank+1];
1632   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1633   b->cowners[0] = 0;
1634   for ( i=2; i<=b->size; i++ ) {
1635     b->cowners[i] += b->cowners[i-1];
1636   }
1637   b->cstart = b->cowners[b->rank];
1638   b->cend   = b->cowners[b->rank+1];
1639 
1640   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1641   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1642   PLogObjectParent(B,b->A);
1643   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1644   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1645   PLogObjectParent(B,b->B);
1646 
1647   /* build cache for off array entries formed */
1648   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1649   b->donotstash  = 0;
1650   b->colmap      = 0;
1651   b->garray      = 0;
1652   b->roworiented = 1;
1653 
1654   /* stuff used for matrix vector multiply */
1655   b->lvec      = 0;
1656   b->Mvctx     = 0;
1657 
1658   /* stuff for MatGetRow() */
1659   b->rowindices   = 0;
1660   b->rowvalues    = 0;
1661   b->getrowactive = PETSC_FALSE;
1662 
1663   *A = B;
1664   return 0;
1665 }
1666 
1667 #undef __FUNC__
1668 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1669 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1670 {
1671   Mat        mat;
1672   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1673   int        ierr, len=0, flg;
1674 
1675   *newmat       = 0;
1676   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1677   PLogObjectCreate(mat);
1678   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1679   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1680   mat->destroy    = MatDestroy_MPIAIJ;
1681   mat->view       = MatView_MPIAIJ;
1682   mat->factor     = matin->factor;
1683   mat->assembled  = PETSC_TRUE;
1684 
1685   a->m = mat->m   = oldmat->m;
1686   a->n = mat->n   = oldmat->n;
1687   a->M = mat->M   = oldmat->M;
1688   a->N = mat->N   = oldmat->N;
1689 
1690   a->rstart       = oldmat->rstart;
1691   a->rend         = oldmat->rend;
1692   a->cstart       = oldmat->cstart;
1693   a->cend         = oldmat->cend;
1694   a->size         = oldmat->size;
1695   a->rank         = oldmat->rank;
1696   mat->insertmode = NOT_SET_VALUES;
1697   a->rowvalues    = 0;
1698   a->getrowactive = PETSC_FALSE;
1699 
1700   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1701   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1702   a->cowners = a->rowners + a->size + 2;
1703   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1704   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1705   if (oldmat->colmap) {
1706     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1707     PLogObjectMemory(mat,(a->N)*sizeof(int));
1708     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1709   } else a->colmap = 0;
1710   if (oldmat->garray) {
1711     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1712     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1713     PLogObjectMemory(mat,len*sizeof(int));
1714     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1715   } else a->garray = 0;
1716 
1717   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1718   PLogObjectParent(mat,a->lvec);
1719   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1720   PLogObjectParent(mat,a->Mvctx);
1721   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1722   PLogObjectParent(mat,a->A);
1723   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1724   PLogObjectParent(mat,a->B);
1725   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1726   if (flg) {
1727     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1728   }
1729   *newmat = mat;
1730   return 0;
1731 }
1732 
1733 #include "sys.h"
1734 
1735 #undef __FUNC__
1736 #define __FUNC__ "MatLoad_MPIAIJ"
1737 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1738 {
1739   Mat          A;
1740   int          i, nz, ierr, j,rstart, rend, fd;
1741   Scalar       *vals,*svals;
1742   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1743   MPI_Status   status;
1744   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1745   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1746   int          tag = ((PetscObject)viewer)->tag;
1747 
1748   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1749   if (!rank) {
1750     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1751     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1752     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1753   }
1754 
1755   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1756   M = header[1]; N = header[2];
1757   /* determine ownership of all rows */
1758   m = M/size + ((M % size) > rank);
1759   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1760   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1761   rowners[0] = 0;
1762   for ( i=2; i<=size; i++ ) {
1763     rowners[i] += rowners[i-1];
1764   }
1765   rstart = rowners[rank];
1766   rend   = rowners[rank+1];
1767 
1768   /* distribute row lengths to all processors */
1769   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1770   offlens = ourlens + (rend-rstart);
1771   if (!rank) {
1772     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1773     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1774     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1775     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1776     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1777     PetscFree(sndcounts);
1778   }
1779   else {
1780     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1781   }
1782 
1783   if (!rank) {
1784     /* calculate the number of nonzeros on each processor */
1785     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1786     PetscMemzero(procsnz,size*sizeof(int));
1787     for ( i=0; i<size; i++ ) {
1788       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1789         procsnz[i] += rowlengths[j];
1790       }
1791     }
1792     PetscFree(rowlengths);
1793 
1794     /* determine max buffer needed and allocate it */
1795     maxnz = 0;
1796     for ( i=0; i<size; i++ ) {
1797       maxnz = PetscMax(maxnz,procsnz[i]);
1798     }
1799     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1800 
1801     /* read in my part of the matrix column indices  */
1802     nz = procsnz[0];
1803     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1804     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1805 
1806     /* read in every one elses and ship off */
1807     for ( i=1; i<size; i++ ) {
1808       nz = procsnz[i];
1809       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1810       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1811     }
1812     PetscFree(cols);
1813   }
1814   else {
1815     /* determine buffer space needed for message */
1816     nz = 0;
1817     for ( i=0; i<m; i++ ) {
1818       nz += ourlens[i];
1819     }
1820     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1821 
1822     /* receive message of column indices*/
1823     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1824     MPI_Get_count(&status,MPI_INT,&maxnz);
1825     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1826   }
1827 
1828   /* loop over local rows, determining number of off diagonal entries */
1829   PetscMemzero(offlens,m*sizeof(int));
1830   jj = 0;
1831   for ( i=0; i<m; i++ ) {
1832     for ( j=0; j<ourlens[i]; j++ ) {
1833       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1834       jj++;
1835     }
1836   }
1837 
1838   /* create our matrix */
1839   for ( i=0; i<m; i++ ) {
1840     ourlens[i] -= offlens[i];
1841   }
1842   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1843   A = *newmat;
1844   MatSetOption(A,MAT_COLUMNS_SORTED);
1845   for ( i=0; i<m; i++ ) {
1846     ourlens[i] += offlens[i];
1847   }
1848 
1849   if (!rank) {
1850     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1851 
1852     /* read in my part of the matrix numerical values  */
1853     nz = procsnz[0];
1854     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1855 
1856     /* insert into matrix */
1857     jj      = rstart;
1858     smycols = mycols;
1859     svals   = vals;
1860     for ( i=0; i<m; i++ ) {
1861       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1862       smycols += ourlens[i];
1863       svals   += ourlens[i];
1864       jj++;
1865     }
1866 
1867     /* read in other processors and ship out */
1868     for ( i=1; i<size; i++ ) {
1869       nz = procsnz[i];
1870       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1871       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1872     }
1873     PetscFree(procsnz);
1874   }
1875   else {
1876     /* receive numeric values */
1877     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1878 
1879     /* receive message of values*/
1880     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1881     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1882     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1883 
1884     /* insert into matrix */
1885     jj      = rstart;
1886     smycols = mycols;
1887     svals   = vals;
1888     for ( i=0; i<m; i++ ) {
1889       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1890       smycols += ourlens[i];
1891       svals   += ourlens[i];
1892       jj++;
1893     }
1894   }
1895   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1896 
1897   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1898   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1899   return 0;
1900 }
1901