xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1de57adc8a1ca54985b5e97fb2fb4832b3ccb382)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.207 1997/05/23 18:38:33 balay Exp balay $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = aimax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66       for ( _i=0; _i<nrow; _i++ ) { \
67         if (rp[_i] > col1) break; \
68         if (rp[_i] == col1) { \
69           if (addv == ADD_VALUES) ap[_i] += value;   \
70           else                  ap[_i] = value; \
71           goto a_noinsert; \
72         } \
73       }  \
74       if (nonew == 1) goto a_noinsert; \
75       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
76       if (nrow >= rmax) { \
77         /* there is no extra room in row, therefore enlarge */ \
78         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
79         Scalar *new_a; \
80  \
81         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
82  \
83         /* malloc new storage space */ \
84         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
85         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
86         new_j   = (int *) (new_a + new_nz); \
87         new_i   = new_j + new_nz; \
88  \
89         /* copy over old data into new slots */ \
90         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
91         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
92         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
93         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
94         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
95                                                            len*sizeof(int)); \
96         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
97         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
98                                                            len*sizeof(Scalar));  \
99         /* free up old matrix storage */ \
100  \
101         PetscFree(a->a);  \
102         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
103         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
104         a->singlemalloc = 1; \
105  \
106         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
107         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
108         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
109         a->maxnz += CHUNKSIZE; \
110         a->reallocs++; \
111       } \
112       N = nrow++ - 1; a->nz++; \
113       /* shift up all the later entries in this row */ \
114       for ( ii=N; ii>=_i; ii-- ) { \
115         rp[ii+1] = rp[ii]; \
116         ap[ii+1] = ap[ii]; \
117       } \
118       rp[_i] = col1;  \
119       ap[_i] = value;  \
120       a_noinsert: ; \
121       ailen[row] = nrow; \
122 }
123 
124 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
125 { \
126  \
127     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
128     rmax = bimax[row]; nrow = bilen[row];  \
129     col1 = col - shift; \
130      \
131       for ( _i=0; _i<nrow; _i++ ) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
150         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
151         new_j   = (int *) (new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
156         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int)); \
161         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
162         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));  \
164         /* free up old matrix storage */ \
165  \
166         PetscFree(b->a);  \
167         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
168         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
169         b->singlemalloc = 1; \
170  \
171         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
172         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
173         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
174         b->maxnz += CHUNKSIZE; \
175         b->reallocs++; \
176       } \
177       N = nrow++ - 1; b->nz++; \
178       /* shift up all the later entries in this row */ \
179       for ( ii=N; ii>=_i; ii-- ) { \
180         rp[ii+1] = rp[ii]; \
181         ap[ii+1] = ap[ii]; \
182       } \
183       rp[_i] = col1;  \
184       ap[_i] = value;  \
185       b_noinsert: ; \
186       bilen[row] = nrow; \
187 }
188 
189 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   for ( i=0; i<m; i++ ) {
216 #if defined(PETSC_BOPT_g)
217     if (im[i] < 0) SETERRQ(1,0,"Negative row");
218     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
219 #endif
220     if (im[i] >= rstart && im[i] < rend) {
221       row = im[i] - rstart;
222       for ( j=0; j<n; j++ ) {
223         if (in[j] >= cstart && in[j] < cend){
224           col = in[j] - cstart;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         }
229 #if defined(PETSC_BOPT_g)
230         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
231         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238             col = aij->colmap[in[j]] - 1;
239             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
241               col =  in[j];
242               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
243               B = aij->B;
244               b = (Mat_SeqAIJ *) B->data;
245               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
246               ba = b->a;
247             }
248           }
249           else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     }
256     else {
257       if (roworiented && !aij->donotstash) {
258         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
259       }
260       else {
261         if (!aij->donotstash) {
262           row = im[i];
263           for ( j=0; j<n; j++ ) {
264             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
265           }
266         }
267       }
268     }
269   }
270   return 0;
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatGetValues_MPIAIJ"
275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
276 {
277   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
278   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
279   int        cstart = aij->cstart, cend = aij->cend,row,col;
280 
281   for ( i=0; i<m; i++ ) {
282     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
283     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for ( j=0; j<n; j++ ) {
287         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
288         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
292         }
293         else {
294           if (!aij->colmap) {
295             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
296           }
297           col = aij->colmap[idxn[j]] - 1;
298           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
299           else {
300             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
301           }
302         }
303       }
304     }
305     else {
306       SETERRQ(1,0,"Only local values currently supported");
307     }
308   }
309   return 0;
310 }
311 
312 #undef __FUNC__
313 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
315 {
316   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
317   MPI_Comm    comm = mat->comm;
318   int         size = aij->size, *owners = aij->rowners;
319   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
320   MPI_Request *send_waits,*recv_waits;
321   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
322   InsertMode  addv;
323   Scalar      *rvalues,*svalues;
324 
325   /* make sure all processors are either in INSERTMODE or ADDMODE */
326   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
327   if (addv == (ADD_VALUES|INSERT_VALUES)) {
328     SETERRQ(1,0,"Some processors inserted others added");
329   }
330   mat->insertmode = addv; /* in case this processor had no cache */
331 
332   /*  first count number of contributors to each processor */
333   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
334   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
335   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
336   for ( i=0; i<aij->stash.n; i++ ) {
337     idx = aij->stash.idx[i];
338     for ( j=0; j<size; j++ ) {
339       if (idx >= owners[j] && idx < owners[j+1]) {
340         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
341       }
342     }
343   }
344   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
345 
346   /* inform other processors of number of messages and max length*/
347   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
348   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
349   nreceives = work[rank];
350   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
351   nmax = work[rank];
352   PetscFree(work);
353 
354   /* post receives:
355        1) each message will consist of ordered pairs
356      (global index,value) we store the global index as a double
357      to simplify the message passing.
358        2) since we don't know how long each individual message is we
359      allocate the largest needed buffer for each receive. Potentially
360      this is a lot of wasted space.
361 
362 
363        This could be done better.
364   */
365   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
366   CHKPTRQ(rvalues);
367   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
368   CHKPTRQ(recv_waits);
369   for ( i=0; i<nreceives; i++ ) {
370     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
371               comm,recv_waits+i);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
379   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
380   CHKPTRQ(send_waits);
381   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
382   starts[0] = 0;
383   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
384   for ( i=0; i<aij->stash.n; i++ ) {
385     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
386     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
387     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
388   }
389   PetscFree(owner);
390   starts[0] = 0;
391   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
392   count = 0;
393   for ( i=0; i<size; i++ ) {
394     if (procs[i]) {
395       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
396                 comm,send_waits+count++);
397     }
398   }
399   PetscFree(starts); PetscFree(nprocs);
400 
401   /* Free cache space */
402   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
403   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
404 
405   aij->svalues    = svalues;    aij->rvalues    = rvalues;
406   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
407   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
408   aij->rmax       = nmax;
409 
410   return 0;
411 }
412 extern int MatSetUpMultiply_MPIAIJ(Mat);
413 
414 #undef __FUNC__
415 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
416 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
417 {
418   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
419   MPI_Status  *send_status,recv_status;
420   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
421   int         row,col,other_disassembled;
422   Scalar      *values,val;
423   InsertMode  addv = mat->insertmode;
424 
425   /*  wait on receives */
426   while (count) {
427     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
428     /* unpack receives into our local space */
429     values = aij->rvalues + 3*imdex*aij->rmax;
430     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
431     n = n/3;
432     for ( i=0; i<n; i++ ) {
433       row = (int) PetscReal(values[3*i]) - aij->rstart;
434       col = (int) PetscReal(values[3*i+1]);
435       val = values[3*i+2];
436       if (col >= aij->cstart && col < aij->cend) {
437         col -= aij->cstart;
438         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
439       }
440       else {
441         if (mat->was_assembled || mat->assembled) {
442           if (!aij->colmap) {
443             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
444           }
445           col = aij->colmap[col] - 1;
446           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
447             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
448             col = (int) PetscReal(values[3*i+1]);
449           }
450         }
451         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
452       }
453     }
454     count--;
455   }
456   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
457 
458   /* wait on sends */
459   if (aij->nsends) {
460     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
461     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
462     PetscFree(send_status);
463   }
464   PetscFree(aij->send_waits); PetscFree(aij->svalues);
465 
466   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
467   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
468 
469   /* determine if any processor has disassembled, if so we must
470      also disassemble ourselfs, in order that we may reassemble. */
471   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
472   if (mat->was_assembled && !other_disassembled) {
473     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
474   }
475 
476   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
477     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
478   }
479   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
480   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
481 
482   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
483   return 0;
484 }
485 
486 #undef __FUNC__
487 #define __FUNC__ "MatZeroEntries_MPIAIJ"
488 int MatZeroEntries_MPIAIJ(Mat A)
489 {
490   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
491   int        ierr;
492   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
493   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
494   return 0;
495 }
496 
497 /* the code does not do the diagonal entries correctly unless the
498    matrix is square and the column and row owerships are identical.
499    This is a BUG. The only way to fix it seems to be to access
500    aij->A and aij->B directly and not through the MatZeroRows()
501    routine.
502 */
503 #undef __FUNC__
504 #define __FUNC__ "MatZeroRows_MPIAIJ"
505 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
506 {
507   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
508   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
509   int            *procs,*nprocs,j,found,idx,nsends,*work;
510   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
511   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
512   int            *lens,imdex,*lrows,*values;
513   MPI_Comm       comm = A->comm;
514   MPI_Request    *send_waits,*recv_waits;
515   MPI_Status     recv_status,*send_status;
516   IS             istmp;
517 
518   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
519   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
520 
521   /*  first count number of contributors to each processor */
522   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
523   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
524   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
525   for ( i=0; i<N; i++ ) {
526     idx = rows[i];
527     found = 0;
528     for ( j=0; j<size; j++ ) {
529       if (idx >= owners[j] && idx < owners[j+1]) {
530         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
531       }
532     }
533     if (!found) SETERRQ(1,0,"Index out of range");
534   }
535   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
536 
537   /* inform other processors of number of messages and max length*/
538   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
539   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
540   nrecvs = work[rank];
541   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
542   nmax = work[rank];
543   PetscFree(work);
544 
545   /* post receives:   */
546   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
547   CHKPTRQ(rvalues);
548   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
549   CHKPTRQ(recv_waits);
550   for ( i=0; i<nrecvs; i++ ) {
551     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
552   }
553 
554   /* do sends:
555       1) starts[i] gives the starting index in svalues for stuff going to
556          the ith processor
557   */
558   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
559   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
560   CHKPTRQ(send_waits);
561   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
562   starts[0] = 0;
563   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
564   for ( i=0; i<N; i++ ) {
565     svalues[starts[owner[i]]++] = rows[i];
566   }
567   ISRestoreIndices(is,&rows);
568 
569   starts[0] = 0;
570   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
571   count = 0;
572   for ( i=0; i<size; i++ ) {
573     if (procs[i]) {
574       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
575     }
576   }
577   PetscFree(starts);
578 
579   base = owners[rank];
580 
581   /*  wait on receives */
582   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
583   source = lens + nrecvs;
584   count  = nrecvs; slen = 0;
585   while (count) {
586     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
587     /* unpack receives into our local space */
588     MPI_Get_count(&recv_status,MPI_INT,&n);
589     source[imdex]  = recv_status.MPI_SOURCE;
590     lens[imdex]  = n;
591     slen += n;
592     count--;
593   }
594   PetscFree(recv_waits);
595 
596   /* move the data into the send scatter */
597   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
598   count = 0;
599   for ( i=0; i<nrecvs; i++ ) {
600     values = rvalues + i*nmax;
601     for ( j=0; j<lens[i]; j++ ) {
602       lrows[count++] = values[j] - base;
603     }
604   }
605   PetscFree(rvalues); PetscFree(lens);
606   PetscFree(owner); PetscFree(nprocs);
607 
608   /* actually zap the local rows */
609   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
610   PLogObjectParent(A,istmp);
611   PetscFree(lrows);
612   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
613   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
614   ierr = ISDestroy(istmp); CHKERRQ(ierr);
615 
616   /* wait on sends */
617   if (nsends) {
618     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
619     CHKPTRQ(send_status);
620     MPI_Waitall(nsends,send_waits,send_status);
621     PetscFree(send_status);
622   }
623   PetscFree(send_waits); PetscFree(svalues);
624 
625   return 0;
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatMult_MPIAIJ"
630 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
631 {
632   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
633   int        ierr,nt;
634 
635   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
636   if (nt != a->n) {
637     SETERRQ(1,0,"Incompatible partition of A and xx");
638   }
639   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
640   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
641   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
642   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
643   return 0;
644 }
645 
646 #undef __FUNC__
647 #define __FUNC__ "MatMultAdd_MPIAIJ"
648 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
649 {
650   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
651   int        ierr;
652   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
653   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
654   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
655   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
656   return 0;
657 }
658 
659 #undef __FUNC__
660 #define __FUNC__ "MatMultTrans_MPIAIJ"
661 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
662 {
663   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
664   int        ierr;
665 
666   /* do nondiagonal part */
667   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
668   /* send it on its way */
669   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
670   /* do local part */
671   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
672   /* receive remote parts: note this assumes the values are not actually */
673   /* inserted in yy until the next line, which is true for my implementation*/
674   /* but is not perhaps always true. */
675   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
676   return 0;
677 }
678 
679 #undef __FUNC__
680 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
681 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
682 {
683   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
684   int        ierr;
685 
686   /* do nondiagonal part */
687   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
688   /* send it on its way */
689   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
690   /* do local part */
691   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
692   /* receive remote parts: note this assumes the values are not actually */
693   /* inserted in yy until the next line, which is true for my implementation*/
694   /* but is not perhaps always true. */
695   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
696   return 0;
697 }
698 
699 /*
700   This only works correctly for square matrices where the subblock A->A is the
701    diagonal block
702 */
703 #undef __FUNC__
704 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
705 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
706 {
707   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
708   if (a->M != a->N)
709     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
710   if (a->rstart != a->cstart || a->rend != a->cend) {
711     SETERRQ(1,0,"row partition must equal col partition");  }
712   return MatGetDiagonal(a->A,v);
713 }
714 
715 #undef __FUNC__
716 #define __FUNC__ "MatScale_MPIAIJ"
717 int MatScale_MPIAIJ(Scalar *aa,Mat A)
718 {
719   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
720   int        ierr;
721   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
722   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
723   return 0;
724 }
725 
726 #undef __FUNC__
727 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
728 int MatDestroy_MPIAIJ(PetscObject obj)
729 {
730   Mat        mat = (Mat) obj;
731   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
732   int        ierr;
733 #if defined(PETSC_LOG)
734   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
735 #endif
736   PetscFree(aij->rowners);
737   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
738   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
739   if (aij->colmap) PetscFree(aij->colmap);
740   if (aij->garray) PetscFree(aij->garray);
741   if (aij->lvec)   VecDestroy(aij->lvec);
742   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
743   if (aij->rowvalues) PetscFree(aij->rowvalues);
744   PetscFree(aij);
745   if (mat->mapping) {
746     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
747   }
748   PLogObjectDestroy(mat);
749   PetscHeaderDestroy(mat);
750   return 0;
751 }
752 
753 #undef __FUNC__
754 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
755 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
756 {
757   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
758   int         ierr;
759 
760   if (aij->size == 1) {
761     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
762   }
763   else SETERRQ(1,0,"Only uniprocessor output supported");
764   return 0;
765 }
766 
767 #undef __FUNC__
768 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
769 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
770 {
771   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
772   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
773   int         ierr, format,shift = C->indexshift,rank;
774   FILE        *fd;
775   ViewerType  vtype;
776 
777   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
778   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
779     ierr = ViewerGetFormat(viewer,&format);
780     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
781       MatInfo info;
782       int     flg;
783       MPI_Comm_rank(mat->comm,&rank);
784       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
785       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
786       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
787       PetscSequentialPhaseBegin(mat->comm,1);
788       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
789          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
790       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
791          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
792       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
793       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
794       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
795       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
796       fflush(fd);
797       PetscSequentialPhaseEnd(mat->comm,1);
798       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
799       return 0;
800     }
801     else if (format == VIEWER_FORMAT_ASCII_INFO) {
802       return 0;
803     }
804   }
805 
806   if (vtype == DRAW_VIEWER) {
807     Draw       draw;
808     PetscTruth isnull;
809     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
810     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
811   }
812 
813   if (vtype == ASCII_FILE_VIEWER) {
814     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
815     PetscSequentialPhaseBegin(mat->comm,1);
816     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
817            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
818            aij->cend);
819     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
820     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
821     fflush(fd);
822     PetscSequentialPhaseEnd(mat->comm,1);
823   }
824   else {
825     int size = aij->size;
826     rank = aij->rank;
827     if (size == 1) {
828       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
829     }
830     else {
831       /* assemble the entire matrix onto first processor. */
832       Mat         A;
833       Mat_SeqAIJ *Aloc;
834       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
835       Scalar      *a;
836 
837       if (!rank) {
838         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
839                CHKERRQ(ierr);
840       }
841       else {
842         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
843                CHKERRQ(ierr);
844       }
845       PLogObjectParent(mat,A);
846 
847       /* copy over the A part */
848       Aloc = (Mat_SeqAIJ*) aij->A->data;
849       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
850       row = aij->rstart;
851       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
852       for ( i=0; i<m; i++ ) {
853         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
854         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
855       }
856       aj = Aloc->j;
857       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
858 
859       /* copy over the B part */
860       Aloc = (Mat_SeqAIJ*) aij->B->data;
861       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
862       row = aij->rstart;
863       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
864       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
865       for ( i=0; i<m; i++ ) {
866         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
867         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
868       }
869       PetscFree(ct);
870       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
871       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
872       if (!rank) {
873         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
874       }
875       ierr = MatDestroy(A); CHKERRQ(ierr);
876     }
877   }
878   return 0;
879 }
880 
881 #undef __FUNC__
882 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
883 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
884 {
885   Mat         mat = (Mat) obj;
886   int         ierr;
887   ViewerType  vtype;
888 
889   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
890   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
891       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
892     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
893   }
894   else if (vtype == BINARY_FILE_VIEWER) {
895     return MatView_MPIAIJ_Binary(mat,viewer);
896   }
897   return 0;
898 }
899 
900 /*
901     This has to provide several versions.
902 
903      2) a) use only local smoothing updating outer values only once.
904         b) local smoothing updating outer values each inner iteration
905      3) color updating out values betwen colors.
906 */
907 #undef __FUNC__
908 #define __FUNC__ "MatRelax_MPIAIJ"
909 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
910                            double fshift,int its,Vec xx)
911 {
912   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
913   Mat        AA = mat->A, BB = mat->B;
914   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
915   Scalar     *b,*x,*xs,*ls,d,*v,sum;
916   int        ierr,*idx, *diag;
917   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
918 
919   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
920   xs = x + shift; /* shift by one for index start of 1 */
921   ls = ls + shift;
922   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
923   diag = A->diag;
924   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
925     if (flag & SOR_ZERO_INITIAL_GUESS) {
926       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
927     }
928     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
929     CHKERRQ(ierr);
930     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
931     CHKERRQ(ierr);
932     while (its--) {
933       /* go down through the rows */
934       for ( i=0; i<m; i++ ) {
935         n    = A->i[i+1] - A->i[i];
936         idx  = A->j + A->i[i] + shift;
937         v    = A->a + A->i[i] + shift;
938         sum  = b[i];
939         SPARSEDENSEMDOT(sum,xs,v,idx,n);
940         d    = fshift + A->a[diag[i]+shift];
941         n    = B->i[i+1] - B->i[i];
942         idx  = B->j + B->i[i] + shift;
943         v    = B->a + B->i[i] + shift;
944         SPARSEDENSEMDOT(sum,ls,v,idx,n);
945         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
946       }
947       /* come up through the rows */
948       for ( i=m-1; i>-1; i-- ) {
949         n    = A->i[i+1] - A->i[i];
950         idx  = A->j + A->i[i] + shift;
951         v    = A->a + A->i[i] + shift;
952         sum  = b[i];
953         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954         d    = fshift + A->a[diag[i]+shift];
955         n    = B->i[i+1] - B->i[i];
956         idx  = B->j + B->i[i] + shift;
957         v    = B->a + B->i[i] + shift;
958         SPARSEDENSEMDOT(sum,ls,v,idx,n);
959         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960       }
961     }
962   }
963   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
964     if (flag & SOR_ZERO_INITIAL_GUESS) {
965       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
966     }
967     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
968     CHKERRQ(ierr);
969     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
970     CHKERRQ(ierr);
971     while (its--) {
972       for ( i=0; i<m; i++ ) {
973         n    = A->i[i+1] - A->i[i];
974         idx  = A->j + A->i[i] + shift;
975         v    = A->a + A->i[i] + shift;
976         sum  = b[i];
977         SPARSEDENSEMDOT(sum,xs,v,idx,n);
978         d    = fshift + A->a[diag[i]+shift];
979         n    = B->i[i+1] - B->i[i];
980         idx  = B->j + B->i[i] + shift;
981         v    = B->a + B->i[i] + shift;
982         SPARSEDENSEMDOT(sum,ls,v,idx,n);
983         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
984       }
985     }
986   }
987   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
988     if (flag & SOR_ZERO_INITIAL_GUESS) {
989       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
990     }
991     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
992                             mat->Mvctx); CHKERRQ(ierr);
993     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
994                             mat->Mvctx); CHKERRQ(ierr);
995     while (its--) {
996       for ( i=m-1; i>-1; i-- ) {
997         n    = A->i[i+1] - A->i[i];
998         idx  = A->j + A->i[i] + shift;
999         v    = A->a + A->i[i] + shift;
1000         sum  = b[i];
1001         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1002         d    = fshift + A->a[diag[i]+shift];
1003         n    = B->i[i+1] - B->i[i];
1004         idx  = B->j + B->i[i] + shift;
1005         v    = B->a + B->i[i] + shift;
1006         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1007         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1008       }
1009     }
1010   }
1011   else {
1012     SETERRQ(1,0,"Option not supported");
1013   }
1014   return 0;
1015 }
1016 
1017 #undef __FUNC__
1018 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1019 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1020 {
1021   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1022   Mat        A = mat->A, B = mat->B;
1023   int        ierr;
1024   double     isend[5], irecv[5];
1025 
1026   info->rows_global    = (double)mat->M;
1027   info->columns_global = (double)mat->N;
1028   info->rows_local     = (double)mat->m;
1029   info->columns_local  = (double)mat->N;
1030   info->block_size     = 1.0;
1031   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1032   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1033   isend[3] = info->memory;  isend[4] = info->mallocs;
1034   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1035   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1036   isend[3] += info->memory;  isend[4] += info->mallocs;
1037   if (flag == MAT_LOCAL) {
1038     info->nz_used      = isend[0];
1039     info->nz_allocated = isend[1];
1040     info->nz_unneeded  = isend[2];
1041     info->memory       = isend[3];
1042     info->mallocs      = isend[4];
1043   } else if (flag == MAT_GLOBAL_MAX) {
1044     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1045     info->nz_used      = irecv[0];
1046     info->nz_allocated = irecv[1];
1047     info->nz_unneeded  = irecv[2];
1048     info->memory       = irecv[3];
1049     info->mallocs      = irecv[4];
1050   } else if (flag == MAT_GLOBAL_SUM) {
1051     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1052     info->nz_used      = irecv[0];
1053     info->nz_allocated = irecv[1];
1054     info->nz_unneeded  = irecv[2];
1055     info->memory       = irecv[3];
1056     info->mallocs      = irecv[4];
1057   }
1058   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1059   info->fill_ratio_needed = 0;
1060   info->factor_mallocs    = 0;
1061 
1062   return 0;
1063 }
1064 
1065 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1066 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1067 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1068 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1069 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1070 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1071 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1072 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1073 
1074 #undef __FUNC__
1075 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1076 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1077 {
1078   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1079 
1080   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1081       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1082       op == MAT_COLUMNS_UNSORTED ||
1083       op == MAT_COLUMNS_SORTED ||
1084       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1085       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1086         MatSetOption(a->A,op);
1087         MatSetOption(a->B,op);
1088   } else if (op == MAT_ROW_ORIENTED) {
1089     a->roworiented = 1;
1090     MatSetOption(a->A,op);
1091     MatSetOption(a->B,op);
1092   } else if (op == MAT_ROWS_SORTED ||
1093              op == MAT_ROWS_UNSORTED ||
1094              op == MAT_SYMMETRIC ||
1095              op == MAT_STRUCTURALLY_SYMMETRIC ||
1096              op == MAT_YES_NEW_DIAGONALS)
1097     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1098   else if (op == MAT_COLUMN_ORIENTED) {
1099     a->roworiented = 0;
1100     MatSetOption(a->A,op);
1101     MatSetOption(a->B,op);
1102   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1103     a->donotstash = 1;
1104   } else if (op == MAT_NO_NEW_DIAGONALS)
1105     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1106   else
1107     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1108   return 0;
1109 }
1110 
1111 #undef __FUNC__
1112 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1113 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1114 {
1115   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1116   *m = mat->M; *n = mat->N;
1117   return 0;
1118 }
1119 
1120 #undef __FUNC__
1121 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1122 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1123 {
1124   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1125   *m = mat->m; *n = mat->N;
1126   return 0;
1127 }
1128 
1129 #undef __FUNC__
1130 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1131 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1132 {
1133   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1134   *m = mat->rstart; *n = mat->rend;
1135   return 0;
1136 }
1137 
1138 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1139 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatGetRow_MPIAIJ"
1143 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1144 {
1145   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1146   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1147   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1148   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1149   int        *cmap, *idx_p;
1150 
1151   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1152   mat->getrowactive = PETSC_TRUE;
1153 
1154   if (!mat->rowvalues && (idx || v)) {
1155     /*
1156         allocate enough space to hold information from the longest row.
1157     */
1158     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1159     int     max = 1,m = mat->m,tmp;
1160     for ( i=0; i<m; i++ ) {
1161       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1162       if (max < tmp) { max = tmp; }
1163     }
1164     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1165     CHKPTRQ(mat->rowvalues);
1166     mat->rowindices = (int *) (mat->rowvalues + max);
1167   }
1168 
1169   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1170   lrow = row - rstart;
1171 
1172   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1173   if (!v)   {pvA = 0; pvB = 0;}
1174   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1175   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1176   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1177   nztot = nzA + nzB;
1178 
1179   cmap  = mat->garray;
1180   if (v  || idx) {
1181     if (nztot) {
1182       /* Sort by increasing column numbers, assuming A and B already sorted */
1183       int imark = -1;
1184       if (v) {
1185         *v = v_p = mat->rowvalues;
1186         for ( i=0; i<nzB; i++ ) {
1187           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1188           else break;
1189         }
1190         imark = i;
1191         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1192         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1193       }
1194       if (idx) {
1195         *idx = idx_p = mat->rowindices;
1196         if (imark > -1) {
1197           for ( i=0; i<imark; i++ ) {
1198             idx_p[i] = cmap[cworkB[i]];
1199           }
1200         } else {
1201           for ( i=0; i<nzB; i++ ) {
1202             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1203             else break;
1204           }
1205           imark = i;
1206         }
1207         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1208         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1209       }
1210     }
1211     else {
1212       if (idx) *idx = 0;
1213       if (v)   *v   = 0;
1214     }
1215   }
1216   *nz = nztot;
1217   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1218   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1219   return 0;
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1224 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1225 {
1226   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1227   if (aij->getrowactive == PETSC_FALSE) {
1228     SETERRQ(1,0,"MatGetRow not called");
1229   }
1230   aij->getrowactive = PETSC_FALSE;
1231   return 0;
1232 }
1233 
1234 #undef __FUNC__
1235 #define __FUNC__ "MatNorm_MPIAIJ"
1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1237 {
1238   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1239   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1240   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1241   double     sum = 0.0;
1242   Scalar     *v;
1243 
1244   if (aij->size == 1) {
1245     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1246   } else {
1247     if (type == NORM_FROBENIUS) {
1248       v = amat->a;
1249       for (i=0; i<amat->nz; i++ ) {
1250 #if defined(PETSC_COMPLEX)
1251         sum += real(conj(*v)*(*v)); v++;
1252 #else
1253         sum += (*v)*(*v); v++;
1254 #endif
1255       }
1256       v = bmat->a;
1257       for (i=0; i<bmat->nz; i++ ) {
1258 #if defined(PETSC_COMPLEX)
1259         sum += real(conj(*v)*(*v)); v++;
1260 #else
1261         sum += (*v)*(*v); v++;
1262 #endif
1263       }
1264       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1265       *norm = sqrt(*norm);
1266     }
1267     else if (type == NORM_1) { /* max column norm */
1268       double *tmp, *tmp2;
1269       int    *jj, *garray = aij->garray;
1270       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1271       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1272       PetscMemzero(tmp,aij->N*sizeof(double));
1273       *norm = 0.0;
1274       v = amat->a; jj = amat->j;
1275       for ( j=0; j<amat->nz; j++ ) {
1276         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1277       }
1278       v = bmat->a; jj = bmat->j;
1279       for ( j=0; j<bmat->nz; j++ ) {
1280         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1281       }
1282       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1283       for ( j=0; j<aij->N; j++ ) {
1284         if (tmp2[j] > *norm) *norm = tmp2[j];
1285       }
1286       PetscFree(tmp); PetscFree(tmp2);
1287     }
1288     else if (type == NORM_INFINITY) { /* max row norm */
1289       double ntemp = 0.0;
1290       for ( j=0; j<amat->m; j++ ) {
1291         v = amat->a + amat->i[j] + shift;
1292         sum = 0.0;
1293         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1294           sum += PetscAbsScalar(*v); v++;
1295         }
1296         v = bmat->a + bmat->i[j] + shift;
1297         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1298           sum += PetscAbsScalar(*v); v++;
1299         }
1300         if (sum > ntemp) ntemp = sum;
1301       }
1302       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1303     }
1304     else {
1305       SETERRQ(1,0,"No support for two norm");
1306     }
1307   }
1308   return 0;
1309 }
1310 
1311 #undef __FUNC__
1312 #define __FUNC__ "MatTranspose_MPIAIJ"
1313 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1314 {
1315   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1316   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1317   int        ierr,shift = Aloc->indexshift;
1318   Mat        B;
1319   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1320   Scalar     *array;
1321 
1322   if (matout == PETSC_NULL && M != N)
1323     SETERRQ(1,0,"Square matrix only for in-place");
1324   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1325          PETSC_NULL,&B); CHKERRQ(ierr);
1326 
1327   /* copy over the A part */
1328   Aloc = (Mat_SeqAIJ*) a->A->data;
1329   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1330   row = a->rstart;
1331   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1332   for ( i=0; i<m; i++ ) {
1333     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1334     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1335   }
1336   aj = Aloc->j;
1337   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1338 
1339   /* copy over the B part */
1340   Aloc = (Mat_SeqAIJ*) a->B->data;
1341   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1342   row = a->rstart;
1343   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1344   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1345   for ( i=0; i<m; i++ ) {
1346     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1347     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1348   }
1349   PetscFree(ct);
1350   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1351   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1352   if (matout != PETSC_NULL) {
1353     *matout = B;
1354   } else {
1355     /* This isn't really an in-place transpose .... but free data structures from a */
1356     PetscFree(a->rowners);
1357     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1358     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1359     if (a->colmap) PetscFree(a->colmap);
1360     if (a->garray) PetscFree(a->garray);
1361     if (a->lvec) VecDestroy(a->lvec);
1362     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1363     PetscFree(a);
1364     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1365     PetscHeaderDestroy(B);
1366   }
1367   return 0;
1368 }
1369 
1370 #undef __FUNC__
1371 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1372 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1373 {
1374   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1375   Mat a = aij->A, b = aij->B;
1376   int ierr,s1,s2,s3;
1377 
1378   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1379   if (rr) {
1380     s3 = aij->n;
1381     VecGetLocalSize_Fast(rr,s1);
1382     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1383     /* Overlap communication with computation. */
1384     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1385   }
1386   if (ll) {
1387     VecGetLocalSize_Fast(ll,s1);
1388     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1389     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1390   }
1391   /* scale  the diagonal block */
1392   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1393 
1394   if (rr) {
1395     /* Do a scatter end and then right scale the off-diagonal block */
1396     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1397     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1398   }
1399 
1400   return 0;
1401 }
1402 
1403 
1404 extern int MatPrintHelp_SeqAIJ(Mat);
1405 #undef __FUNC__
1406 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1407 int MatPrintHelp_MPIAIJ(Mat A)
1408 {
1409   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1410 
1411   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1412   else return 0;
1413 }
1414 
1415 #undef __FUNC__
1416 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1417 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1418 {
1419   *bs = 1;
1420   return 0;
1421 }
1422 #undef __FUNC__
1423 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1424 int MatSetUnfactored_MPIAIJ(Mat A)
1425 {
1426   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1427   int        ierr;
1428   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1429   return 0;
1430 }
1431 
1432 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1433 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1434 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1435 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1436 /* -------------------------------------------------------------------*/
1437 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1438        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1439        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1440        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1441        0,0,
1442        0,0,
1443        0,0,
1444        MatRelax_MPIAIJ,
1445        MatTranspose_MPIAIJ,
1446        MatGetInfo_MPIAIJ,0,
1447        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1448        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1449        0,
1450        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1451        0,0,0,0,
1452        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1453        0,0,
1454        0,0,MatConvertSameType_MPIAIJ,0,0,
1455        0,0,0,
1456        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1457        MatPrintHelp_MPIAIJ,
1458        MatScale_MPIAIJ,0,0,0,
1459        MatGetBlockSize_MPIAIJ,0,0,0,0,
1460        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1461 
1462 
1463 #undef __FUNC__
1464 #define __FUNC__ "MatCreateMPIAIJ"
1465 /*@C
1466    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1467    (the default parallel PETSc format).  For good matrix assembly performance
1468    the user should preallocate the matrix storage by setting the parameters
1469    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1470    performance can be increased by more than a factor of 50.
1471 
1472    Input Parameters:
1473 .  comm - MPI communicator
1474 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1475            This value should be the same as the local size used in creating the
1476            y vector for the matrix-vector product y = Ax.
1477 .  n - This value should be the same as the local size used in creating the
1478        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1479        calculated if N is given) For square matrices n is almost always m.
1480 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1481 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1482 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1483            (same for all local rows)
1484 .  d_nzz - array containing the number of nonzeros in the various rows of the
1485            diagonal portion of the local submatrix (possibly different for each row)
1486            or PETSC_NULL. You must leave room for the diagonal entry even if
1487            it is zero.
1488 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1489            submatrix (same for all local rows).
1490 .  o_nzz - array containing the number of nonzeros in the various rows of the
1491            off-diagonal portion of the local submatrix (possibly different for
1492            each row) or PETSC_NULL.
1493 
1494    Output Parameter:
1495 .  A - the matrix
1496 
1497    Notes:
1498    The AIJ format (also called the Yale sparse matrix format or
1499    compressed row storage), is fully compatible with standard Fortran 77
1500    storage.  That is, the stored row and column indices can begin at
1501    either one (as in Fortran) or zero.  See the users manual for details.
1502 
1503    The user MUST specify either the local or global matrix dimensions
1504    (possibly both).
1505 
1506    By default, this format uses inodes (identical nodes) when possible.
1507    We search for consecutive rows with the same nonzero structure, thereby
1508    reusing matrix information to achieve increased efficiency.
1509 
1510    Options Database Keys:
1511 $    -mat_aij_no_inode  - Do not use inodes
1512 $    -mat_aij_inode_limit <limit> - Set inode limit.
1513 $        (max limit=5)
1514 $    -mat_aij_oneindex - Internally use indexing starting at 1
1515 $        rather than 0.  Note: When calling MatSetValues(),
1516 $        the user still MUST index entries starting at 0!
1517 
1518    Storage Information:
1519    For a square global matrix we define each processor's diagonal portion
1520    to be its local rows and the corresponding columns (a square submatrix);
1521    each processor's off-diagonal portion encompasses the remainder of the
1522    local matrix (a rectangular submatrix).
1523 
1524    The user can specify preallocated storage for the diagonal part of
1525    the local submatrix with either d_nz or d_nnz (not both).  Set
1526    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1527    memory allocation.  Likewise, specify preallocated storage for the
1528    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1529 
1530    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1531    the figure below we depict these three local rows and all columns (0-11).
1532 
1533 $          0 1 2 3 4 5 6 7 8 9 10 11
1534 $         -------------------
1535 $  row 3  |  o o o d d d o o o o o o
1536 $  row 4  |  o o o d d d o o o o o o
1537 $  row 5  |  o o o d d d o o o o o o
1538 $         -------------------
1539 $
1540 
1541    Thus, any entries in the d locations are stored in the d (diagonal)
1542    submatrix, and any entries in the o locations are stored in the
1543    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1544    stored simply in the MATSEQAIJ format for compressed row storage.
1545 
1546    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1547    and o_nz should indicate the number of nonzeros per row in the o matrix.
1548    In general, for PDE problems in which most nonzeros are near the diagonal,
1549    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1550    or you will get TERRIBLE performance; see the users' manual chapter on
1551    matrices.
1552 
1553 .keywords: matrix, aij, compressed row, sparse, parallel
1554 
1555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1556 @*/
1557 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1558                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1559 {
1560   Mat          B;
1561   Mat_MPIAIJ   *b;
1562   int          ierr, i,sum[2],work[2],size;
1563 
1564   *A = 0;
1565   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1566   PLogObjectCreate(B);
1567   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1568   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1569   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1570   MPI_Comm_size(comm,&size);
1571   if (size == 1) {
1572     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1573     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1574     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1575     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1576     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1577     B->ops.solve             = MatSolve_MPIAIJ;
1578     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1579     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1580     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1581     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1582   }
1583   B->destroy    = MatDestroy_MPIAIJ;
1584   B->view       = MatView_MPIAIJ;
1585   B->factor     = 0;
1586   B->assembled  = PETSC_FALSE;
1587   B->mapping    = 0;
1588 
1589   B->insertmode = NOT_SET_VALUES;
1590   MPI_Comm_rank(comm,&b->rank);
1591   MPI_Comm_size(comm,&b->size);
1592 
1593   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1594     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1595 
1596   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1597     work[0] = m; work[1] = n;
1598     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1599     if (M == PETSC_DECIDE) M = sum[0];
1600     if (N == PETSC_DECIDE) N = sum[1];
1601   }
1602   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1603   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1604   b->m = m; B->m = m;
1605   b->n = n; B->n = n;
1606   b->N = N; B->N = N;
1607   b->M = M; B->M = M;
1608 
1609   /* build local table of row and column ownerships */
1610   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1611   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1612   b->cowners = b->rowners + b->size + 2;
1613   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1614   b->rowners[0] = 0;
1615   for ( i=2; i<=b->size; i++ ) {
1616     b->rowners[i] += b->rowners[i-1];
1617   }
1618   b->rstart = b->rowners[b->rank];
1619   b->rend   = b->rowners[b->rank+1];
1620   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1621   b->cowners[0] = 0;
1622   for ( i=2; i<=b->size; i++ ) {
1623     b->cowners[i] += b->cowners[i-1];
1624   }
1625   b->cstart = b->cowners[b->rank];
1626   b->cend   = b->cowners[b->rank+1];
1627 
1628   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1629   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1630   PLogObjectParent(B,b->A);
1631   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1632   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1633   PLogObjectParent(B,b->B);
1634 
1635   /* build cache for off array entries formed */
1636   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1637   b->donotstash  = 0;
1638   b->colmap      = 0;
1639   b->garray      = 0;
1640   b->roworiented = 1;
1641 
1642   /* stuff used for matrix vector multiply */
1643   b->lvec      = 0;
1644   b->Mvctx     = 0;
1645 
1646   /* stuff for MatGetRow() */
1647   b->rowindices   = 0;
1648   b->rowvalues    = 0;
1649   b->getrowactive = PETSC_FALSE;
1650 
1651   *A = B;
1652   return 0;
1653 }
1654 
1655 #undef __FUNC__
1656 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1657 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1658 {
1659   Mat        mat;
1660   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1661   int        ierr, len=0, flg;
1662 
1663   *newmat       = 0;
1664   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1665   PLogObjectCreate(mat);
1666   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1667   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1668   mat->destroy    = MatDestroy_MPIAIJ;
1669   mat->view       = MatView_MPIAIJ;
1670   mat->factor     = matin->factor;
1671   mat->assembled  = PETSC_TRUE;
1672 
1673   a->m = mat->m   = oldmat->m;
1674   a->n = mat->n   = oldmat->n;
1675   a->M = mat->M   = oldmat->M;
1676   a->N = mat->N   = oldmat->N;
1677 
1678   a->rstart       = oldmat->rstart;
1679   a->rend         = oldmat->rend;
1680   a->cstart       = oldmat->cstart;
1681   a->cend         = oldmat->cend;
1682   a->size         = oldmat->size;
1683   a->rank         = oldmat->rank;
1684   mat->insertmode = NOT_SET_VALUES;
1685   a->rowvalues    = 0;
1686   a->getrowactive = PETSC_FALSE;
1687 
1688   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1689   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1690   a->cowners = a->rowners + a->size + 2;
1691   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1692   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1693   if (oldmat->colmap) {
1694     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1695     PLogObjectMemory(mat,(a->N)*sizeof(int));
1696     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1697   } else a->colmap = 0;
1698   if (oldmat->garray) {
1699     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1700     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1701     PLogObjectMemory(mat,len*sizeof(int));
1702     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1703   } else a->garray = 0;
1704 
1705   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1706   PLogObjectParent(mat,a->lvec);
1707   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1708   PLogObjectParent(mat,a->Mvctx);
1709   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1710   PLogObjectParent(mat,a->A);
1711   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1712   PLogObjectParent(mat,a->B);
1713   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1714   if (flg) {
1715     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1716   }
1717   *newmat = mat;
1718   return 0;
1719 }
1720 
1721 #include "sys.h"
1722 
1723 #undef __FUNC__
1724 #define __FUNC__ "MatLoad_MPIAIJ"
1725 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1726 {
1727   Mat          A;
1728   int          i, nz, ierr, j,rstart, rend, fd;
1729   Scalar       *vals,*svals;
1730   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1731   MPI_Status   status;
1732   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1733   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1734   int          tag = ((PetscObject)viewer)->tag;
1735 
1736   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1737   if (!rank) {
1738     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1739     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1740     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1741   }
1742 
1743   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1744   M = header[1]; N = header[2];
1745   /* determine ownership of all rows */
1746   m = M/size + ((M % size) > rank);
1747   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1748   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1749   rowners[0] = 0;
1750   for ( i=2; i<=size; i++ ) {
1751     rowners[i] += rowners[i-1];
1752   }
1753   rstart = rowners[rank];
1754   rend   = rowners[rank+1];
1755 
1756   /* distribute row lengths to all processors */
1757   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1758   offlens = ourlens + (rend-rstart);
1759   if (!rank) {
1760     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1761     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1762     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1763     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1764     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1765     PetscFree(sndcounts);
1766   }
1767   else {
1768     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1769   }
1770 
1771   if (!rank) {
1772     /* calculate the number of nonzeros on each processor */
1773     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1774     PetscMemzero(procsnz,size*sizeof(int));
1775     for ( i=0; i<size; i++ ) {
1776       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1777         procsnz[i] += rowlengths[j];
1778       }
1779     }
1780     PetscFree(rowlengths);
1781 
1782     /* determine max buffer needed and allocate it */
1783     maxnz = 0;
1784     for ( i=0; i<size; i++ ) {
1785       maxnz = PetscMax(maxnz,procsnz[i]);
1786     }
1787     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1788 
1789     /* read in my part of the matrix column indices  */
1790     nz = procsnz[0];
1791     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1792     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1793 
1794     /* read in every one elses and ship off */
1795     for ( i=1; i<size; i++ ) {
1796       nz = procsnz[i];
1797       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1798       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1799     }
1800     PetscFree(cols);
1801   }
1802   else {
1803     /* determine buffer space needed for message */
1804     nz = 0;
1805     for ( i=0; i<m; i++ ) {
1806       nz += ourlens[i];
1807     }
1808     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1809 
1810     /* receive message of column indices*/
1811     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1812     MPI_Get_count(&status,MPI_INT,&maxnz);
1813     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1814   }
1815 
1816   /* loop over local rows, determining number of off diagonal entries */
1817   PetscMemzero(offlens,m*sizeof(int));
1818   jj = 0;
1819   for ( i=0; i<m; i++ ) {
1820     for ( j=0; j<ourlens[i]; j++ ) {
1821       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1822       jj++;
1823     }
1824   }
1825 
1826   /* create our matrix */
1827   for ( i=0; i<m; i++ ) {
1828     ourlens[i] -= offlens[i];
1829   }
1830   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1831   A = *newmat;
1832   MatSetOption(A,MAT_COLUMNS_SORTED);
1833   for ( i=0; i<m; i++ ) {
1834     ourlens[i] += offlens[i];
1835   }
1836 
1837   if (!rank) {
1838     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1839 
1840     /* read in my part of the matrix numerical values  */
1841     nz = procsnz[0];
1842     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1843 
1844     /* insert into matrix */
1845     jj      = rstart;
1846     smycols = mycols;
1847     svals   = vals;
1848     for ( i=0; i<m; i++ ) {
1849       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1850       smycols += ourlens[i];
1851       svals   += ourlens[i];
1852       jj++;
1853     }
1854 
1855     /* read in other processors and ship out */
1856     for ( i=1; i<size; i++ ) {
1857       nz = procsnz[i];
1858       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1859       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1860     }
1861     PetscFree(procsnz);
1862   }
1863   else {
1864     /* receive numeric values */
1865     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1866 
1867     /* receive message of values*/
1868     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1869     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1870     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1871 
1872     /* insert into matrix */
1873     jj      = rstart;
1874     smycols = mycols;
1875     svals   = vals;
1876     for ( i=0; i<m; i++ ) {
1877       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1878       smycols += ourlens[i];
1879       svals   += ourlens[i];
1880       jj++;
1881     }
1882   }
1883   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1884 
1885   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1886   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1887   return 0;
1888 }
1889