xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b8ec51381d66da6d50fd78b876e2ef1711ed7dd1)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.203 1997/05/03 22:43:34 curfman Exp curfman $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = aimax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66       for ( _i=0; _i<nrow; _i++ ) { \
67         if (rp[_i] > col1) break; \
68         if (rp[_i] == col1) { \
69           if (addv == ADD_VALUES) ap[_i] += value;   \
70           else                  ap[_i] = value; \
71           goto a_noinsert; \
72         } \
73       }  \
74       if (nonew == 1) goto a_noinsert; \
75       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
76       if (nrow >= rmax) { \
77         /* there is no extra room in row, therefore enlarge */ \
78         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
79         Scalar *new_a; \
80  \
81         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
82  \
83         /* malloc new storage space */ \
84         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
85         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
86         new_j   = (int *) (new_a + new_nz); \
87         new_i   = new_j + new_nz; \
88  \
89         /* copy over old data into new slots */ \
90         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
91         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
92         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
93         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
94         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
95                                                            len*sizeof(int)); \
96         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
97         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
98                                                            len*sizeof(Scalar));  \
99         /* free up old matrix storage */ \
100  \
101         PetscFree(a->a);  \
102         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
103         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
104         a->singlemalloc = 1; \
105  \
106         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
107         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
108         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
109         a->maxnz += CHUNKSIZE; \
110         a->reallocs++; \
111       } \
112       N = nrow++ - 1; a->nz++; \
113       /* shift up all the later entries in this row */ \
114       for ( ii=N; ii>=_i; ii-- ) { \
115         rp[ii+1] = rp[ii]; \
116         ap[ii+1] = ap[ii]; \
117       } \
118       rp[_i] = col1;  \
119       ap[_i] = value;  \
120       a_noinsert: ; \
121       ailen[row] = nrow; \
122 }
123 
124 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
125 { \
126  \
127     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
128     rmax = bimax[row]; nrow = bilen[row];  \
129     col1 = col - shift; \
130      \
131       for ( _i=0; _i<nrow; _i++ ) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
144         Scalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
150         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
151         new_j   = (int *) (new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
156         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int)); \
161         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
162         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(Scalar));  \
164         /* free up old matrix storage */ \
165  \
166         PetscFree(b->a);  \
167         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
168         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
169         b->singlemalloc = 1; \
170  \
171         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
172         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
173         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
174         b->maxnz += CHUNKSIZE; \
175         b->reallocs++; \
176       } \
177       N = nrow++ - 1; b->nz++; \
178       /* shift up all the later entries in this row */ \
179       for ( ii=N; ii>=_i; ii-- ) { \
180         rp[ii+1] = rp[ii]; \
181         ap[ii+1] = ap[ii]; \
182       } \
183       rp[_i] = col1;  \
184       ap[_i] = value;  \
185       b_noinsert: ; \
186       bilen[row] = nrow; \
187 }
188 
189 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   for ( i=0; i<m; i++ ) {
216 #if defined(PETSC_BOPT_g)
217     if (im[i] < 0) SETERRQ(1,0,"Negative row");
218     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
219 #endif
220     if (im[i] >= rstart && im[i] < rend) {
221       row = im[i] - rstart;
222       for ( j=0; j<n; j++ ) {
223         if (in[j] >= cstart && in[j] < cend){
224           col = in[j] - cstart;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         }
229 #if defined(PETSC_BOPT_g)
230         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
231         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238             col = aij->colmap[in[j]] - 1;
239             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
241               col =  in[j];
242             }
243           }
244           else col = in[j];
245           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
246           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
247           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
248         }
249       }
250     }
251     else {
252       if (roworiented && !aij->donotstash) {
253         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
254       }
255       else {
256         if (!aij->donotstash) {
257           row = im[i];
258           for ( j=0; j<n; j++ ) {
259             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
260           }
261         }
262       }
263     }
264   }
265   return 0;
266 }
267 
268 #undef __FUNC__
269 #define __FUNC__ "MatGetValues_MPIAIJ"
270 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
271 {
272   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
273   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
274   int        cstart = aij->cstart, cend = aij->cend,row,col;
275 
276   for ( i=0; i<m; i++ ) {
277     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
278     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
279     if (idxm[i] >= rstart && idxm[i] < rend) {
280       row = idxm[i] - rstart;
281       for ( j=0; j<n; j++ ) {
282         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
283         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
284         if (idxn[j] >= cstart && idxn[j] < cend){
285           col = idxn[j] - cstart;
286           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
287         }
288         else {
289           if (!aij->colmap) {
290             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
291           }
292           col = aij->colmap[idxn[j]] - 1;
293           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
294           else {
295             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
296           }
297         }
298       }
299     }
300     else {
301       SETERRQ(1,0,"Only local values currently supported");
302     }
303   }
304   return 0;
305 }
306 
307 #undef __FUNC__
308 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
309 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
310 {
311   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
312   MPI_Comm    comm = mat->comm;
313   int         size = aij->size, *owners = aij->rowners;
314   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
315   MPI_Request *send_waits,*recv_waits;
316   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
317   InsertMode  addv;
318   Scalar      *rvalues,*svalues;
319 
320   /* make sure all processors are either in INSERTMODE or ADDMODE */
321   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
322   if (addv == (ADD_VALUES|INSERT_VALUES)) {
323     SETERRQ(1,0,"Some processors inserted others added");
324   }
325   mat->insertmode = addv; /* in case this processor had no cache */
326 
327   /*  first count number of contributors to each processor */
328   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
329   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
330   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
331   for ( i=0; i<aij->stash.n; i++ ) {
332     idx = aij->stash.idx[i];
333     for ( j=0; j<size; j++ ) {
334       if (idx >= owners[j] && idx < owners[j+1]) {
335         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
336       }
337     }
338   }
339   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
340 
341   /* inform other processors of number of messages and max length*/
342   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
343   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
344   nreceives = work[rank];
345   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
346   nmax = work[rank];
347   PetscFree(work);
348 
349   /* post receives:
350        1) each message will consist of ordered pairs
351      (global index,value) we store the global index as a double
352      to simplify the message passing.
353        2) since we don't know how long each individual message is we
354      allocate the largest needed buffer for each receive. Potentially
355      this is a lot of wasted space.
356 
357 
358        This could be done better.
359   */
360   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
361   CHKPTRQ(rvalues);
362   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
363   CHKPTRQ(recv_waits);
364   for ( i=0; i<nreceives; i++ ) {
365     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
366               comm,recv_waits+i);
367   }
368 
369   /* do sends:
370       1) starts[i] gives the starting index in svalues for stuff going to
371          the ith processor
372   */
373   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
374   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
375   CHKPTRQ(send_waits);
376   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
377   starts[0] = 0;
378   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   for ( i=0; i<aij->stash.n; i++ ) {
380     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
381     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
382     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
383   }
384   PetscFree(owner);
385   starts[0] = 0;
386   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
387   count = 0;
388   for ( i=0; i<size; i++ ) {
389     if (procs[i]) {
390       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
391                 comm,send_waits+count++);
392     }
393   }
394   PetscFree(starts); PetscFree(nprocs);
395 
396   /* Free cache space */
397   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
398   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
399 
400   aij->svalues    = svalues;    aij->rvalues    = rvalues;
401   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
402   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
403   aij->rmax       = nmax;
404 
405   return 0;
406 }
407 extern int MatSetUpMultiply_MPIAIJ(Mat);
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
411 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
412 {
413   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
414   MPI_Status  *send_status,recv_status;
415   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
416   int         row,col,other_disassembled;
417   Scalar      *values,val;
418   InsertMode  addv = mat->insertmode;
419 
420   /*  wait on receives */
421   while (count) {
422     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
423     /* unpack receives into our local space */
424     values = aij->rvalues + 3*imdex*aij->rmax;
425     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
426     n = n/3;
427     for ( i=0; i<n; i++ ) {
428       row = (int) PetscReal(values[3*i]) - aij->rstart;
429       col = (int) PetscReal(values[3*i+1]);
430       val = values[3*i+2];
431       if (col >= aij->cstart && col < aij->cend) {
432         col -= aij->cstart;
433         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
434       }
435       else {
436         if (mat->was_assembled || mat->assembled) {
437           if (!aij->colmap) {
438             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
439           }
440           col = aij->colmap[col] - 1;
441           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
442             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
443             col = (int) PetscReal(values[3*i+1]);
444           }
445         }
446         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
447       }
448     }
449     count--;
450   }
451   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
452 
453   /* wait on sends */
454   if (aij->nsends) {
455     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
456     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
457     PetscFree(send_status);
458   }
459   PetscFree(aij->send_waits); PetscFree(aij->svalues);
460 
461   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
462   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
463 
464   /* determine if any processor has disassembled, if so we must
465      also disassemble ourselfs, in order that we may reassemble. */
466   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
467   if (mat->was_assembled && !other_disassembled) {
468     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
469   }
470 
471   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
472     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
473   }
474   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
475   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
476 
477   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
478   return 0;
479 }
480 
481 #undef __FUNC__
482 #define __FUNC__ "MatZeroEntries_MPIAIJ"
483 int MatZeroEntries_MPIAIJ(Mat A)
484 {
485   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
486   int        ierr;
487   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
488   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
489   return 0;
490 }
491 
492 /* the code does not do the diagonal entries correctly unless the
493    matrix is square and the column and row owerships are identical.
494    This is a BUG. The only way to fix it seems to be to access
495    aij->A and aij->B directly and not through the MatZeroRows()
496    routine.
497 */
498 #undef __FUNC__
499 #define __FUNC__ "MatZeroRows_MPIAIJ"
500 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
501 {
502   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
503   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
504   int            *procs,*nprocs,j,found,idx,nsends,*work;
505   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
506   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
507   int            *lens,imdex,*lrows,*values;
508   MPI_Comm       comm = A->comm;
509   MPI_Request    *send_waits,*recv_waits;
510   MPI_Status     recv_status,*send_status;
511   IS             istmp;
512 
513   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
514   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
515 
516   /*  first count number of contributors to each processor */
517   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
518   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
519   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
520   for ( i=0; i<N; i++ ) {
521     idx = rows[i];
522     found = 0;
523     for ( j=0; j<size; j++ ) {
524       if (idx >= owners[j] && idx < owners[j+1]) {
525         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
526       }
527     }
528     if (!found) SETERRQ(1,0,"Index out of range");
529   }
530   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
531 
532   /* inform other processors of number of messages and max length*/
533   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
534   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
535   nrecvs = work[rank];
536   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
537   nmax = work[rank];
538   PetscFree(work);
539 
540   /* post receives:   */
541   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
542   CHKPTRQ(rvalues);
543   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
544   CHKPTRQ(recv_waits);
545   for ( i=0; i<nrecvs; i++ ) {
546     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
547   }
548 
549   /* do sends:
550       1) starts[i] gives the starting index in svalues for stuff going to
551          the ith processor
552   */
553   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
554   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
555   CHKPTRQ(send_waits);
556   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
557   starts[0] = 0;
558   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
559   for ( i=0; i<N; i++ ) {
560     svalues[starts[owner[i]]++] = rows[i];
561   }
562   ISRestoreIndices(is,&rows);
563 
564   starts[0] = 0;
565   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
566   count = 0;
567   for ( i=0; i<size; i++ ) {
568     if (procs[i]) {
569       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
570     }
571   }
572   PetscFree(starts);
573 
574   base = owners[rank];
575 
576   /*  wait on receives */
577   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
578   source = lens + nrecvs;
579   count  = nrecvs; slen = 0;
580   while (count) {
581     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
582     /* unpack receives into our local space */
583     MPI_Get_count(&recv_status,MPI_INT,&n);
584     source[imdex]  = recv_status.MPI_SOURCE;
585     lens[imdex]  = n;
586     slen += n;
587     count--;
588   }
589   PetscFree(recv_waits);
590 
591   /* move the data into the send scatter */
592   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
593   count = 0;
594   for ( i=0; i<nrecvs; i++ ) {
595     values = rvalues + i*nmax;
596     for ( j=0; j<lens[i]; j++ ) {
597       lrows[count++] = values[j] - base;
598     }
599   }
600   PetscFree(rvalues); PetscFree(lens);
601   PetscFree(owner); PetscFree(nprocs);
602 
603   /* actually zap the local rows */
604   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
605   PLogObjectParent(A,istmp);
606   PetscFree(lrows);
607   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
608   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
609   ierr = ISDestroy(istmp); CHKERRQ(ierr);
610 
611   /* wait on sends */
612   if (nsends) {
613     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
614     CHKPTRQ(send_status);
615     MPI_Waitall(nsends,send_waits,send_status);
616     PetscFree(send_status);
617   }
618   PetscFree(send_waits); PetscFree(svalues);
619 
620   return 0;
621 }
622 
623 #undef __FUNC__
624 #define __FUNC__ "MatMult_MPIAIJ"
625 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
628   int        ierr,nt;
629 
630   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
631   if (nt != a->n) {
632     SETERRQ(1,0,"Incompatible partition of A and xx");
633   }
634   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
635   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
636   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
637   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
638   return 0;
639 }
640 
641 #undef __FUNC__
642 #define __FUNC__ "MatMultAdd_MPIAIJ"
643 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
644 {
645   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
646   int        ierr;
647   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
648   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
649   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
650   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
651   return 0;
652 }
653 
654 #undef __FUNC__
655 #define __FUNC__ "MatMultTrans_MPIAIJ"
656 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
657 {
658   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
659   int        ierr;
660 
661   /* do nondiagonal part */
662   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
663   /* send it on its way */
664   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
665   /* do local part */
666   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
667   /* receive remote parts: note this assumes the values are not actually */
668   /* inserted in yy until the next line, which is true for my implementation*/
669   /* but is not perhaps always true. */
670   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
671   return 0;
672 }
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
676 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
679   int        ierr;
680 
681   /* do nondiagonal part */
682   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
683   /* send it on its way */
684   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
685   /* do local part */
686   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
687   /* receive remote parts: note this assumes the values are not actually */
688   /* inserted in yy until the next line, which is true for my implementation*/
689   /* but is not perhaps always true. */
690   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
691   return 0;
692 }
693 
694 /*
695   This only works correctly for square matrices where the subblock A->A is the
696    diagonal block
697 */
698 #undef __FUNC__
699 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
700 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
701 {
702   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
703   if (a->M != a->N)
704     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
705   if (a->rstart != a->cstart || a->rend != a->cend) {
706     SETERRQ(1,0,"row partition must equal col partition");  }
707   return MatGetDiagonal(a->A,v);
708 }
709 
710 #undef __FUNC__
711 #define __FUNC__ "MatScale_MPIAIJ"
712 int MatScale_MPIAIJ(Scalar *aa,Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
715   int        ierr;
716   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
717   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
718   return 0;
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
723 int MatDestroy_MPIAIJ(PetscObject obj)
724 {
725   Mat        mat = (Mat) obj;
726   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
727   int        ierr;
728 #if defined(PETSC_LOG)
729   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
730 #endif
731   PetscFree(aij->rowners);
732   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
733   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
734   if (aij->colmap) PetscFree(aij->colmap);
735   if (aij->garray) PetscFree(aij->garray);
736   if (aij->lvec)   VecDestroy(aij->lvec);
737   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
738   if (aij->rowvalues) PetscFree(aij->rowvalues);
739   PetscFree(aij);
740   if (mat->mapping) {
741     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
742   }
743   PLogObjectDestroy(mat);
744   PetscHeaderDestroy(mat);
745   return 0;
746 }
747 
748 #undef __FUNC__
749 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
750 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
751 {
752   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
753   int         ierr;
754 
755   if (aij->size == 1) {
756     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
757   }
758   else SETERRQ(1,0,"Only uniprocessor output supported");
759   return 0;
760 }
761 
762 #undef __FUNC__
763 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
764 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
765 {
766   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
767   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
768   int         ierr, format,shift = C->indexshift,rank;
769   FILE        *fd;
770   ViewerType  vtype;
771 
772   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
773   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
774     ierr = ViewerGetFormat(viewer,&format);
775     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
776       MatInfo info;
777       int     flg;
778       MPI_Comm_rank(mat->comm,&rank);
779       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
780       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
781       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
782       PetscSequentialPhaseBegin(mat->comm,1);
783       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
784          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
785       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
786          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
787       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
788       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
789       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
790       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
791       fflush(fd);
792       PetscSequentialPhaseEnd(mat->comm,1);
793       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
794       return 0;
795     }
796     else if (format == VIEWER_FORMAT_ASCII_INFO) {
797       return 0;
798     }
799   }
800 
801   if (vtype == DRAW_VIEWER) {
802     Draw       draw;
803     PetscTruth isnull;
804     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
805     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
806   }
807 
808   if (vtype == ASCII_FILE_VIEWER) {
809     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
810     PetscSequentialPhaseBegin(mat->comm,1);
811     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
812            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
813            aij->cend);
814     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
815     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
816     fflush(fd);
817     PetscSequentialPhaseEnd(mat->comm,1);
818   }
819   else {
820     int size = aij->size;
821     rank = aij->rank;
822     if (size == 1) {
823       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
824     }
825     else {
826       /* assemble the entire matrix onto first processor. */
827       Mat         A;
828       Mat_SeqAIJ *Aloc;
829       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
830       Scalar      *a;
831 
832       if (!rank) {
833         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
834                CHKERRQ(ierr);
835       }
836       else {
837         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
838                CHKERRQ(ierr);
839       }
840       PLogObjectParent(mat,A);
841 
842       /* copy over the A part */
843       Aloc = (Mat_SeqAIJ*) aij->A->data;
844       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
845       row = aij->rstart;
846       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
847       for ( i=0; i<m; i++ ) {
848         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
849         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
850       }
851       aj = Aloc->j;
852       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
853 
854       /* copy over the B part */
855       Aloc = (Mat_SeqAIJ*) aij->B->data;
856       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
857       row = aij->rstart;
858       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
859       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
860       for ( i=0; i<m; i++ ) {
861         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
862         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
863       }
864       PetscFree(ct);
865       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
866       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
867       if (!rank) {
868         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
869       }
870       ierr = MatDestroy(A); CHKERRQ(ierr);
871     }
872   }
873   return 0;
874 }
875 
876 #undef __FUNC__
877 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
878 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
879 {
880   Mat         mat = (Mat) obj;
881   int         ierr;
882   ViewerType  vtype;
883 
884   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
885   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
886       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
887     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
888   }
889   else if (vtype == BINARY_FILE_VIEWER) {
890     return MatView_MPIAIJ_Binary(mat,viewer);
891   }
892   return 0;
893 }
894 
895 /*
896     This has to provide several versions.
897 
898      2) a) use only local smoothing updating outer values only once.
899         b) local smoothing updating outer values each inner iteration
900      3) color updating out values betwen colors.
901 */
902 #undef __FUNC__
903 #define __FUNC__ "MatRelax_MPIAIJ"
904 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
905                            double fshift,int its,Vec xx)
906 {
907   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
908   Mat        AA = mat->A, BB = mat->B;
909   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
910   Scalar     *b,*x,*xs,*ls,d,*v,sum;
911   int        ierr,*idx, *diag;
912   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
913 
914   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
915   xs = x + shift; /* shift by one for index start of 1 */
916   ls = ls + shift;
917   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
918   diag = A->diag;
919   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
920     if (flag & SOR_ZERO_INITIAL_GUESS) {
921       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
922     }
923     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
924     CHKERRQ(ierr);
925     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
926     CHKERRQ(ierr);
927     while (its--) {
928       /* go down through the rows */
929       for ( i=0; i<m; i++ ) {
930         n    = A->i[i+1] - A->i[i];
931         idx  = A->j + A->i[i] + shift;
932         v    = A->a + A->i[i] + shift;
933         sum  = b[i];
934         SPARSEDENSEMDOT(sum,xs,v,idx,n);
935         d    = fshift + A->a[diag[i]+shift];
936         n    = B->i[i+1] - B->i[i];
937         idx  = B->j + B->i[i] + shift;
938         v    = B->a + B->i[i] + shift;
939         SPARSEDENSEMDOT(sum,ls,v,idx,n);
940         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
941       }
942       /* come up through the rows */
943       for ( i=m-1; i>-1; i-- ) {
944         n    = A->i[i+1] - A->i[i];
945         idx  = A->j + A->i[i] + shift;
946         v    = A->a + A->i[i] + shift;
947         sum  = b[i];
948         SPARSEDENSEMDOT(sum,xs,v,idx,n);
949         d    = fshift + A->a[diag[i]+shift];
950         n    = B->i[i+1] - B->i[i];
951         idx  = B->j + B->i[i] + shift;
952         v    = B->a + B->i[i] + shift;
953         SPARSEDENSEMDOT(sum,ls,v,idx,n);
954         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
955       }
956     }
957   }
958   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
959     if (flag & SOR_ZERO_INITIAL_GUESS) {
960       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
961     }
962     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
963     CHKERRQ(ierr);
964     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
965     CHKERRQ(ierr);
966     while (its--) {
967       for ( i=0; i<m; i++ ) {
968         n    = A->i[i+1] - A->i[i];
969         idx  = A->j + A->i[i] + shift;
970         v    = A->a + A->i[i] + shift;
971         sum  = b[i];
972         SPARSEDENSEMDOT(sum,xs,v,idx,n);
973         d    = fshift + A->a[diag[i]+shift];
974         n    = B->i[i+1] - B->i[i];
975         idx  = B->j + B->i[i] + shift;
976         v    = B->a + B->i[i] + shift;
977         SPARSEDENSEMDOT(sum,ls,v,idx,n);
978         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
979       }
980     }
981   }
982   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
983     if (flag & SOR_ZERO_INITIAL_GUESS) {
984       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
985     }
986     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
987                             mat->Mvctx); CHKERRQ(ierr);
988     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
989                             mat->Mvctx); CHKERRQ(ierr);
990     while (its--) {
991       for ( i=m-1; i>-1; i-- ) {
992         n    = A->i[i+1] - A->i[i];
993         idx  = A->j + A->i[i] + shift;
994         v    = A->a + A->i[i] + shift;
995         sum  = b[i];
996         SPARSEDENSEMDOT(sum,xs,v,idx,n);
997         d    = fshift + A->a[diag[i]+shift];
998         n    = B->i[i+1] - B->i[i];
999         idx  = B->j + B->i[i] + shift;
1000         v    = B->a + B->i[i] + shift;
1001         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1002         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1003       }
1004     }
1005   }
1006   else {
1007     SETERRQ(1,0,"Option not supported");
1008   }
1009   return 0;
1010 }
1011 
1012 #undef __FUNC__
1013 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1015 {
1016   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1017   Mat        A = mat->A, B = mat->B;
1018   int        ierr;
1019   double     isend[5], irecv[5];
1020 
1021   info->rows_global    = (double)mat->M;
1022   info->columns_global = (double)mat->N;
1023   info->rows_local     = (double)mat->m;
1024   info->columns_local  = (double)mat->N;
1025   info->block_size     = 1.0;
1026   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1027   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1028   isend[3] = info->memory;  isend[4] = info->mallocs;
1029   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1030   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1031   isend[3] += info->memory;  isend[4] += info->mallocs;
1032   if (flag == MAT_LOCAL) {
1033     info->nz_used      = isend[0];
1034     info->nz_allocated = isend[1];
1035     info->nz_unneeded  = isend[2];
1036     info->memory       = isend[3];
1037     info->mallocs      = isend[4];
1038   } else if (flag == MAT_GLOBAL_MAX) {
1039     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1040     info->nz_used      = irecv[0];
1041     info->nz_allocated = irecv[1];
1042     info->nz_unneeded  = irecv[2];
1043     info->memory       = irecv[3];
1044     info->mallocs      = irecv[4];
1045   } else if (flag == MAT_GLOBAL_SUM) {
1046     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1047     info->nz_used      = irecv[0];
1048     info->nz_allocated = irecv[1];
1049     info->nz_unneeded  = irecv[2];
1050     info->memory       = irecv[3];
1051     info->mallocs      = irecv[4];
1052   }
1053   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1054   info->fill_ratio_needed = 0;
1055   info->factor_mallocs    = 0;
1056 
1057   return 0;
1058 }
1059 
1060 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1061 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1062 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1063 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1064 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1065 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1066 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1067 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1068 
1069 #undef __FUNC__
1070 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1071 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1072 {
1073   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1074 
1075   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1076       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1077       op == MAT_COLUMNS_UNSORTED ||
1078       op == MAT_COLUMNS_SORTED ||
1079       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1080       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1081         MatSetOption(a->A,op);
1082         MatSetOption(a->B,op);
1083   } else if (op == MAT_ROW_ORIENTED) {
1084     a->roworiented = 1;
1085     MatSetOption(a->A,op);
1086     MatSetOption(a->B,op);
1087   } else if (op == MAT_ROWS_SORTED ||
1088              op == MAT_ROWS_UNSORTED ||
1089              op == MAT_SYMMETRIC ||
1090              op == MAT_STRUCTURALLY_SYMMETRIC ||
1091              op == MAT_YES_NEW_DIAGONALS)
1092     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1093   else if (op == MAT_COLUMN_ORIENTED) {
1094     a->roworiented = 0;
1095     MatSetOption(a->A,op);
1096     MatSetOption(a->B,op);
1097   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1098     a->donotstash = 1;
1099   } else if (op == MAT_NO_NEW_DIAGONALS)
1100     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1101   else
1102     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1103   return 0;
1104 }
1105 
1106 #undef __FUNC__
1107 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1108 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1109 {
1110   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1111   *m = mat->M; *n = mat->N;
1112   return 0;
1113 }
1114 
1115 #undef __FUNC__
1116 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1117 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1118 {
1119   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1120   *m = mat->m; *n = mat->N;
1121   return 0;
1122 }
1123 
1124 #undef __FUNC__
1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1127 {
1128   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1129   *m = mat->rstart; *n = mat->rend;
1130   return 0;
1131 }
1132 
1133 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1134 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1135 
1136 #undef __FUNC__
1137 #define __FUNC__ "MatGetRow_MPIAIJ"
1138 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1139 {
1140   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1141   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1142   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1143   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1144   int        *cmap, *idx_p;
1145 
1146   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1147   mat->getrowactive = PETSC_TRUE;
1148 
1149   if (!mat->rowvalues && (idx || v)) {
1150     /*
1151         allocate enough space to hold information from the longest row.
1152     */
1153     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1154     int     max = 1,m = mat->m,tmp;
1155     for ( i=0; i<m; i++ ) {
1156       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1157       if (max < tmp) { max = tmp; }
1158     }
1159     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1160     CHKPTRQ(mat->rowvalues);
1161     mat->rowindices = (int *) (mat->rowvalues + max);
1162   }
1163 
1164   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1165   lrow = row - rstart;
1166 
1167   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1168   if (!v)   {pvA = 0; pvB = 0;}
1169   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1170   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1171   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1172   nztot = nzA + nzB;
1173 
1174   cmap  = mat->garray;
1175   if (v  || idx) {
1176     if (nztot) {
1177       /* Sort by increasing column numbers, assuming A and B already sorted */
1178       int imark = -1;
1179       if (v) {
1180         *v = v_p = mat->rowvalues;
1181         for ( i=0; i<nzB; i++ ) {
1182           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1183           else break;
1184         }
1185         imark = i;
1186         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1187         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1188       }
1189       if (idx) {
1190         *idx = idx_p = mat->rowindices;
1191         if (imark > -1) {
1192           for ( i=0; i<imark; i++ ) {
1193             idx_p[i] = cmap[cworkB[i]];
1194           }
1195         } else {
1196           for ( i=0; i<nzB; i++ ) {
1197             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1198             else break;
1199           }
1200           imark = i;
1201         }
1202         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1203         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1204       }
1205     }
1206     else {
1207       if (idx) *idx = 0;
1208       if (v)   *v   = 0;
1209     }
1210   }
1211   *nz = nztot;
1212   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1213   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1214   return 0;
1215 }
1216 
1217 #undef __FUNC__
1218 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1219 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1220 {
1221   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1222   if (aij->getrowactive == PETSC_FALSE) {
1223     SETERRQ(1,0,"MatGetRow not called");
1224   }
1225   aij->getrowactive = PETSC_FALSE;
1226   return 0;
1227 }
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ "MatNorm_MPIAIJ"
1231 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1232 {
1233   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1234   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1235   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1236   double     sum = 0.0;
1237   Scalar     *v;
1238 
1239   if (aij->size == 1) {
1240     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1241   } else {
1242     if (type == NORM_FROBENIUS) {
1243       v = amat->a;
1244       for (i=0; i<amat->nz; i++ ) {
1245 #if defined(PETSC_COMPLEX)
1246         sum += real(conj(*v)*(*v)); v++;
1247 #else
1248         sum += (*v)*(*v); v++;
1249 #endif
1250       }
1251       v = bmat->a;
1252       for (i=0; i<bmat->nz; i++ ) {
1253 #if defined(PETSC_COMPLEX)
1254         sum += real(conj(*v)*(*v)); v++;
1255 #else
1256         sum += (*v)*(*v); v++;
1257 #endif
1258       }
1259       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1260       *norm = sqrt(*norm);
1261     }
1262     else if (type == NORM_1) { /* max column norm */
1263       double *tmp, *tmp2;
1264       int    *jj, *garray = aij->garray;
1265       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1266       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1267       PetscMemzero(tmp,aij->N*sizeof(double));
1268       *norm = 0.0;
1269       v = amat->a; jj = amat->j;
1270       for ( j=0; j<amat->nz; j++ ) {
1271         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1272       }
1273       v = bmat->a; jj = bmat->j;
1274       for ( j=0; j<bmat->nz; j++ ) {
1275         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1276       }
1277       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1278       for ( j=0; j<aij->N; j++ ) {
1279         if (tmp2[j] > *norm) *norm = tmp2[j];
1280       }
1281       PetscFree(tmp); PetscFree(tmp2);
1282     }
1283     else if (type == NORM_INFINITY) { /* max row norm */
1284       double ntemp = 0.0;
1285       for ( j=0; j<amat->m; j++ ) {
1286         v = amat->a + amat->i[j] + shift;
1287         sum = 0.0;
1288         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1289           sum += PetscAbsScalar(*v); v++;
1290         }
1291         v = bmat->a + bmat->i[j] + shift;
1292         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1293           sum += PetscAbsScalar(*v); v++;
1294         }
1295         if (sum > ntemp) ntemp = sum;
1296       }
1297       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1298     }
1299     else {
1300       SETERRQ(1,0,"No support for two norm");
1301     }
1302   }
1303   return 0;
1304 }
1305 
1306 #undef __FUNC__
1307 #define __FUNC__ "MatTranspose_MPIAIJ"
1308 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1309 {
1310   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1311   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1312   int        ierr,shift = Aloc->indexshift;
1313   Mat        B;
1314   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1315   Scalar     *array;
1316 
1317   if (matout == PETSC_NULL && M != N)
1318     SETERRQ(1,0,"Square matrix only for in-place");
1319   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1320          PETSC_NULL,&B); CHKERRQ(ierr);
1321 
1322   /* copy over the A part */
1323   Aloc = (Mat_SeqAIJ*) a->A->data;
1324   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1325   row = a->rstart;
1326   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1327   for ( i=0; i<m; i++ ) {
1328     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1329     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1330   }
1331   aj = Aloc->j;
1332   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1333 
1334   /* copy over the B part */
1335   Aloc = (Mat_SeqAIJ*) a->B->data;
1336   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1337   row = a->rstart;
1338   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1339   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1340   for ( i=0; i<m; i++ ) {
1341     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1342     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1343   }
1344   PetscFree(ct);
1345   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1346   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1347   if (matout != PETSC_NULL) {
1348     *matout = B;
1349   } else {
1350     /* This isn't really an in-place transpose .... but free data structures from a */
1351     PetscFree(a->rowners);
1352     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1353     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1354     if (a->colmap) PetscFree(a->colmap);
1355     if (a->garray) PetscFree(a->garray);
1356     if (a->lvec) VecDestroy(a->lvec);
1357     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1358     PetscFree(a);
1359     PetscMemcpy(A,B,sizeof(struct _Mat));
1360     PetscHeaderDestroy(B);
1361   }
1362   return 0;
1363 }
1364 
1365 #undef __FUNC__
1366 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1367 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1368 {
1369   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1370   Mat a = aij->A, b = aij->B;
1371   int ierr,s1,s2,s3;
1372 
1373   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1374   if (rr) {
1375     s3 = aij->n;
1376     VecGetLocalSize_Fast(rr,s1);
1377     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1378     /* Overlap communication with computation. */
1379     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1380   }
1381   if (ll) {
1382     VecGetLocalSize_Fast(ll,s1);
1383     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1384     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1385   }
1386   /* scale  the diagonal block */
1387   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1388 
1389   if (rr) {
1390     /* Do a scatter end and then right scale the off-diagonal block */
1391     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1392     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1393   }
1394 
1395   return 0;
1396 }
1397 
1398 
1399 extern int MatPrintHelp_SeqAIJ(Mat);
1400 #undef __FUNC__
1401 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1402 int MatPrintHelp_MPIAIJ(Mat A)
1403 {
1404   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1405 
1406   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1407   else return 0;
1408 }
1409 
1410 #undef __FUNC__
1411 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1412 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1413 {
1414   *bs = 1;
1415   return 0;
1416 }
1417 #undef __FUNC__
1418 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1419 int MatSetUnfactored_MPIAIJ(Mat A)
1420 {
1421   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1422   int        ierr;
1423   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1424   return 0;
1425 }
1426 
1427 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1428 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1429 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1430 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1431 /* -------------------------------------------------------------------*/
1432 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1433        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1434        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1435        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1436        0,0,
1437        0,0,
1438        0,0,
1439        MatRelax_MPIAIJ,
1440        MatTranspose_MPIAIJ,
1441        MatGetInfo_MPIAIJ,0,
1442        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1443        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1444        0,
1445        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1446        0,0,0,0,
1447        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1448        0,0,
1449        0,0,MatConvertSameType_MPIAIJ,0,0,
1450        0,0,0,
1451        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1452        MatPrintHelp_MPIAIJ,
1453        MatScale_MPIAIJ,0,0,0,
1454        MatGetBlockSize_MPIAIJ,0,0,0,0,
1455        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1456 
1457 
1458 #undef __FUNC__
1459 #define __FUNC__ "MatCreateMPIAIJ"
1460 /*@C
1461    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1462    (the default parallel PETSc format).  For good matrix assembly performance
1463    the user should preallocate the matrix storage by setting the parameters
1464    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1465    performance can be increased by more than a factor of 50.
1466 
1467    Input Parameters:
1468 .  comm - MPI communicator
1469 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1470            This value should be the same as the local size used in creating the
1471            y vector for the matrix-vector product y = Ax.
1472 .  n - This value should be the same as the local size used in creating the
1473        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1474        calculated if N is given) For square matrices n is almost always m.
1475 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1476 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1477 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1478            (same for all local rows)
1479 .  d_nzz - array containing the number of nonzeros in the various rows of the
1480            diagonal portion of the local submatrix (possibly different for each row)
1481            or PETSC_NULL. You must leave room for the diagonal entry even if
1482            it is zero.
1483 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1484            submatrix (same for all local rows).
1485 .  o_nzz - array containing the number of nonzeros in the various rows of the
1486            off-diagonal portion of the local submatrix (possibly different for
1487            each row) or PETSC_NULL.
1488 
1489    Output Parameter:
1490 .  A - the matrix
1491 
1492    Notes:
1493    The AIJ format (also called the Yale sparse matrix format or
1494    compressed row storage), is fully compatible with standard Fortran 77
1495    storage.  That is, the stored row and column indices can begin at
1496    either one (as in Fortran) or zero.  See the users manual for details.
1497 
1498    The user MUST specify either the local or global matrix dimensions
1499    (possibly both).
1500 
1501    By default, this format uses inodes (identical nodes) when possible.
1502    We search for consecutive rows with the same nonzero structure, thereby
1503    reusing matrix information to achieve increased efficiency.
1504 
1505    Options Database Keys:
1506 $    -mat_aij_no_inode  - Do not use inodes
1507 $    -mat_aij_inode_limit <limit> - Set inode limit.
1508 $        (max limit=5)
1509 $    -mat_aij_oneindex - Internally use indexing starting at 1
1510 $        rather than 0.  Note: When calling MatSetValues(),
1511 $        the user still MUST index entries starting at 0!
1512 
1513    Storage Information:
1514    For a square global matrix we define each processor's diagonal portion
1515    to be its local rows and the corresponding columns (a square submatrix);
1516    each processor's off-diagonal portion encompasses the remainder of the
1517    local matrix (a rectangular submatrix).
1518 
1519    The user can specify preallocated storage for the diagonal part of
1520    the local submatrix with either d_nz or d_nnz (not both).  Set
1521    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1522    memory allocation.  Likewise, specify preallocated storage for the
1523    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1524 
1525    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1526    the figure below we depict these three local rows and all columns (0-11).
1527 
1528 $          0 1 2 3 4 5 6 7 8 9 10 11
1529 $         -------------------
1530 $  row 3  |  o o o d d d o o o o o o
1531 $  row 4  |  o o o d d d o o o o o o
1532 $  row 5  |  o o o d d d o o o o o o
1533 $         -------------------
1534 $
1535 
1536    Thus, any entries in the d locations are stored in the d (diagonal)
1537    submatrix, and any entries in the o locations are stored in the
1538    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1539    stored simply in the MATSEQAIJ format for compressed row storage.
1540 
1541    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1542    and o_nz should indicate the number of nonzeros per row in the o matrix.
1543    In general, for PDE problems in which most nonzeros are near the diagonal,
1544    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1545    or you will get TERRIBLE performance; see the users' manual chapter on
1546    matrices.
1547 
1548 .keywords: matrix, aij, compressed row, sparse, parallel
1549 
1550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1551 @*/
1552 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1553                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1554 {
1555   Mat          B;
1556   Mat_MPIAIJ   *b;
1557   int          ierr, i,sum[2],work[2],size;
1558 
1559   *A = 0;
1560   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1561   PLogObjectCreate(B);
1562   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1563   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1564   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1565   MPI_Comm_size(comm,&size);
1566   if (size == 1) {
1567     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1568     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1569     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1570     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1571     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1572     B->ops.solve             = MatSolve_MPIAIJ;
1573     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1574     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1575     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1576     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1577   }
1578   B->destroy    = MatDestroy_MPIAIJ;
1579   B->view       = MatView_MPIAIJ;
1580   B->factor     = 0;
1581   B->assembled  = PETSC_FALSE;
1582   B->mapping    = 0;
1583 
1584   B->insertmode = NOT_SET_VALUES;
1585   MPI_Comm_rank(comm,&b->rank);
1586   MPI_Comm_size(comm,&b->size);
1587 
1588   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1589     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1590 
1591   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1592     work[0] = m; work[1] = n;
1593     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1594     if (M == PETSC_DECIDE) M = sum[0];
1595     if (N == PETSC_DECIDE) N = sum[1];
1596   }
1597   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1598   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1599   b->m = m; B->m = m;
1600   b->n = n; B->n = n;
1601   b->N = N; B->N = N;
1602   b->M = M; B->M = M;
1603 
1604   /* build local table of row and column ownerships */
1605   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1606   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1607   b->cowners = b->rowners + b->size + 2;
1608   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1609   b->rowners[0] = 0;
1610   for ( i=2; i<=b->size; i++ ) {
1611     b->rowners[i] += b->rowners[i-1];
1612   }
1613   b->rstart = b->rowners[b->rank];
1614   b->rend   = b->rowners[b->rank+1];
1615   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1616   b->cowners[0] = 0;
1617   for ( i=2; i<=b->size; i++ ) {
1618     b->cowners[i] += b->cowners[i-1];
1619   }
1620   b->cstart = b->cowners[b->rank];
1621   b->cend   = b->cowners[b->rank+1];
1622 
1623   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1624   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1625   PLogObjectParent(B,b->A);
1626   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1627   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1628   PLogObjectParent(B,b->B);
1629 
1630   /* build cache for off array entries formed */
1631   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1632   b->donotstash  = 0;
1633   b->colmap      = 0;
1634   b->garray      = 0;
1635   b->roworiented = 1;
1636 
1637   /* stuff used for matrix vector multiply */
1638   b->lvec      = 0;
1639   b->Mvctx     = 0;
1640 
1641   /* stuff for MatGetRow() */
1642   b->rowindices   = 0;
1643   b->rowvalues    = 0;
1644   b->getrowactive = PETSC_FALSE;
1645 
1646   *A = B;
1647   return 0;
1648 }
1649 
1650 #undef __FUNC__
1651 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1652 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1653 {
1654   Mat        mat;
1655   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1656   int        ierr, len=0, flg;
1657 
1658   *newmat       = 0;
1659   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1660   PLogObjectCreate(mat);
1661   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1662   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1663   mat->destroy    = MatDestroy_MPIAIJ;
1664   mat->view       = MatView_MPIAIJ;
1665   mat->factor     = matin->factor;
1666   mat->assembled  = PETSC_TRUE;
1667 
1668   a->m = mat->m   = oldmat->m;
1669   a->n = mat->n   = oldmat->n;
1670   a->M = mat->M   = oldmat->M;
1671   a->N = mat->N   = oldmat->N;
1672 
1673   a->rstart       = oldmat->rstart;
1674   a->rend         = oldmat->rend;
1675   a->cstart       = oldmat->cstart;
1676   a->cend         = oldmat->cend;
1677   a->size         = oldmat->size;
1678   a->rank         = oldmat->rank;
1679   mat->insertmode = NOT_SET_VALUES;
1680   a->rowvalues    = 0;
1681   a->getrowactive = PETSC_FALSE;
1682 
1683   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1684   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1685   a->cowners = a->rowners + a->size + 2;
1686   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1687   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1688   if (oldmat->colmap) {
1689     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1690     PLogObjectMemory(mat,(a->N)*sizeof(int));
1691     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1692   } else a->colmap = 0;
1693   if (oldmat->garray) {
1694     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1695     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1696     PLogObjectMemory(mat,len*sizeof(int));
1697     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1698   } else a->garray = 0;
1699 
1700   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1701   PLogObjectParent(mat,a->lvec);
1702   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1703   PLogObjectParent(mat,a->Mvctx);
1704   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1705   PLogObjectParent(mat,a->A);
1706   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1707   PLogObjectParent(mat,a->B);
1708   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1709   if (flg) {
1710     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1711   }
1712   *newmat = mat;
1713   return 0;
1714 }
1715 
1716 #include "sys.h"
1717 
1718 #undef __FUNC__
1719 #define __FUNC__ "MatLoad_MPIAIJ"
1720 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1721 {
1722   Mat          A;
1723   int          i, nz, ierr, j,rstart, rend, fd;
1724   Scalar       *vals,*svals;
1725   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1726   MPI_Status   status;
1727   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1728   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1729   int          tag = ((PetscObject)viewer)->tag;
1730 
1731   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1732   if (!rank) {
1733     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1734     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1735     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1736   }
1737 
1738   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1739   M = header[1]; N = header[2];
1740   /* determine ownership of all rows */
1741   m = M/size + ((M % size) > rank);
1742   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1743   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1744   rowners[0] = 0;
1745   for ( i=2; i<=size; i++ ) {
1746     rowners[i] += rowners[i-1];
1747   }
1748   rstart = rowners[rank];
1749   rend   = rowners[rank+1];
1750 
1751   /* distribute row lengths to all processors */
1752   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1753   offlens = ourlens + (rend-rstart);
1754   if (!rank) {
1755     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1756     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1757     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1758     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1759     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1760     PetscFree(sndcounts);
1761   }
1762   else {
1763     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1764   }
1765 
1766   if (!rank) {
1767     /* calculate the number of nonzeros on each processor */
1768     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1769     PetscMemzero(procsnz,size*sizeof(int));
1770     for ( i=0; i<size; i++ ) {
1771       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1772         procsnz[i] += rowlengths[j];
1773       }
1774     }
1775     PetscFree(rowlengths);
1776 
1777     /* determine max buffer needed and allocate it */
1778     maxnz = 0;
1779     for ( i=0; i<size; i++ ) {
1780       maxnz = PetscMax(maxnz,procsnz[i]);
1781     }
1782     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1783 
1784     /* read in my part of the matrix column indices  */
1785     nz = procsnz[0];
1786     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1787     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1788 
1789     /* read in every one elses and ship off */
1790     for ( i=1; i<size; i++ ) {
1791       nz = procsnz[i];
1792       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1793       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1794     }
1795     PetscFree(cols);
1796   }
1797   else {
1798     /* determine buffer space needed for message */
1799     nz = 0;
1800     for ( i=0; i<m; i++ ) {
1801       nz += ourlens[i];
1802     }
1803     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1804 
1805     /* receive message of column indices*/
1806     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1807     MPI_Get_count(&status,MPI_INT,&maxnz);
1808     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1809   }
1810 
1811   /* loop over local rows, determining number of off diagonal entries */
1812   PetscMemzero(offlens,m*sizeof(int));
1813   jj = 0;
1814   for ( i=0; i<m; i++ ) {
1815     for ( j=0; j<ourlens[i]; j++ ) {
1816       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1817       jj++;
1818     }
1819   }
1820 
1821   /* create our matrix */
1822   for ( i=0; i<m; i++ ) {
1823     ourlens[i] -= offlens[i];
1824   }
1825   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1826   A = *newmat;
1827   MatSetOption(A,MAT_COLUMNS_SORTED);
1828   for ( i=0; i<m; i++ ) {
1829     ourlens[i] += offlens[i];
1830   }
1831 
1832   if (!rank) {
1833     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1834 
1835     /* read in my part of the matrix numerical values  */
1836     nz = procsnz[0];
1837     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1838 
1839     /* insert into matrix */
1840     jj      = rstart;
1841     smycols = mycols;
1842     svals   = vals;
1843     for ( i=0; i<m; i++ ) {
1844       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1845       smycols += ourlens[i];
1846       svals   += ourlens[i];
1847       jj++;
1848     }
1849 
1850     /* read in other processors and ship out */
1851     for ( i=1; i<size; i++ ) {
1852       nz = procsnz[i];
1853       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1854       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1855     }
1856     PetscFree(procsnz);
1857   }
1858   else {
1859     /* receive numeric values */
1860     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1861 
1862     /* receive message of values*/
1863     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1864     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1865     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1866 
1867     /* insert into matrix */
1868     jj      = rstart;
1869     smycols = mycols;
1870     svals   = vals;
1871     for ( i=0; i<m; i++ ) {
1872       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1873       smycols += ourlens[i];
1874       svals   += ourlens[i];
1875       jj++;
1876     }
1877   }
1878   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1879 
1880   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1881   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1882   return 0;
1883 }
1884