xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 96854ed6ad2ded53123cd28a73515071866c9e4e)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.201 1997/04/10 00:02:50 bsmith Exp curfman $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = aimax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66       for ( _i=0; _i<nrow; _i++ ) { \
67         if (rp[_i] > col1) break; \
68         if (rp[_i] == col1) { \
69           if (addv == ADD_VALUES) ap[_i] += value;   \
70           else                  ap[_i] = value; \
71           goto a_noinsert; \
72         } \
73       }  \
74       if (nonew)  goto a_noinsert; \
75       if (nrow >= rmax) { \
76         /* there is no extra room in row, therefore enlarge */ \
77         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
78         Scalar *new_a; \
79  \
80         /* malloc new storage space */ \
81         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
82         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
83         new_j   = (int *) (new_a + new_nz); \
84         new_i   = new_j + new_nz; \
85  \
86         /* copy over old data into new slots */ \
87         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
88         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
89         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
90         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
91         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
92                                                            len*sizeof(int)); \
93         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
94         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
95                                                            len*sizeof(Scalar));  \
96         /* free up old matrix storage */ \
97  \
98         PetscFree(a->a);  \
99         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
100         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
101         a->singlemalloc = 1; \
102  \
103         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
104         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
105         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
106         a->maxnz += CHUNKSIZE; \
107         a->reallocs++; \
108       } \
109       N = nrow++ - 1; a->nz++; \
110       /* shift up all the later entries in this row */ \
111       for ( ii=N; ii>=_i; ii-- ) { \
112         rp[ii+1] = rp[ii]; \
113         ap[ii+1] = ap[ii]; \
114       } \
115       rp[_i] = col1;  \
116       ap[_i] = value;  \
117       a_noinsert: ; \
118       ailen[row] = nrow; \
119 }
120 
121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
122 { \
123  \
124     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
125     rmax = bimax[row]; nrow = bilen[row];  \
126     col1 = col - shift; \
127      \
128       for ( _i=0; _i<nrow; _i++ ) { \
129         if (rp[_i] > col1) break; \
130         if (rp[_i] == col1) { \
131           if (addv == ADD_VALUES) ap[_i] += value;   \
132           else                  ap[_i] = value; \
133           goto b_noinsert; \
134         } \
135       }  \
136       if (nonew)  goto b_noinsert; \
137       if (nrow >= rmax) { \
138         /* there is no extra room in row, therefore enlarge */ \
139         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
140         Scalar *new_a; \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
144         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
145         new_j   = (int *) (new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
150         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(int)); \
155         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
156         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(Scalar));  \
158         /* free up old matrix storage */ \
159  \
160         PetscFree(b->a);  \
161         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
162         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
163         b->singlemalloc = 1; \
164  \
165         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
166         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
167         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
168         b->maxnz += CHUNKSIZE; \
169         b->reallocs++; \
170       } \
171       N = nrow++ - 1; b->nz++; \
172       /* shift up all the later entries in this row */ \
173       for ( ii=N; ii>=_i; ii-- ) { \
174         rp[ii+1] = rp[ii]; \
175         ap[ii+1] = ap[ii]; \
176       } \
177       rp[_i] = col1;  \
178       ap[_i] = value;  \
179       b_noinsert: ; \
180       bilen[row] = nrow; \
181 }
182 
183 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
184 #undef __FUNC__
185 #define __FUNC__ "MatSetValues_MPIAIJ"
186 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
189   Scalar     value;
190   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
191   int        cstart = aij->cstart, cend = aij->cend,row,col;
192   int        roworiented = aij->roworiented;
193 
194   /* Some Variables required in the macro */
195   Mat        A = aij->A;
196   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
197   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
198   Scalar     *aa = a->a;
199 
200   Mat        B = aij->B;
201   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
202   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
203   Scalar     *ba = b->a;
204 
205   int        *rp,ii,nrow,_i,rmax, N, col1;
206   int        nonew = a->nonew,shift = a->indexshift;
207   Scalar     *ap;
208 
209   for ( i=0; i<m; i++ ) {
210 #if defined(PETSC_BOPT_g)
211     if (im[i] < 0) SETERRQ(1,0,"Negative row");
212     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
213 #endif
214     if (im[i] >= rstart && im[i] < rend) {
215       row = im[i] - rstart;
216       for ( j=0; j<n; j++ ) {
217         if (in[j] >= cstart && in[j] < cend){
218           col = in[j] - cstart;
219           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
220           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
221           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
222         }
223 #if defined(PETSC_BOPT_g)
224         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
225         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
226 #endif
227         else {
228           if (mat->was_assembled) {
229             if (!aij->colmap) {
230               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
231             }
232             col = aij->colmap[in[j]] - 1;
233             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
234               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
235               col =  in[j];
236             }
237           }
238           else col = in[j];
239           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
240           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
241           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
242         }
243       }
244     }
245     else {
246       if (roworiented && !aij->donotstash) {
247         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
248       }
249       else {
250         if (!aij->donotstash) {
251           row = im[i];
252           for ( j=0; j<n; j++ ) {
253             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
254           }
255         }
256       }
257     }
258   }
259   return 0;
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatGetValues_MPIAIJ"
264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
265 {
266   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
267   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
268   int        cstart = aij->cstart, cend = aij->cend,row,col;
269 
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     }
294     else {
295       SETERRQ(1,0,"Only local values currently supported");
296     }
297   }
298   return 0;
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   /* make sure all processors are either in INSERTMODE or ADDMODE */
315   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
316   if (addv == (ADD_VALUES|INSERT_VALUES)) {
317     SETERRQ(1,0,"Some processors inserted others added");
318   }
319   mat->insertmode = addv; /* in case this processor had no cache */
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
325   for ( i=0; i<aij->stash.n; i++ ) {
326     idx = aij->stash.idx[i];
327     for ( j=0; j<size; j++ ) {
328       if (idx >= owners[j] && idx < owners[j+1]) {
329         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
330       }
331     }
332   }
333   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
334 
335   /* inform other processors of number of messages and max length*/
336   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
337   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
338   nreceives = work[rank];
339   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
340   nmax = work[rank];
341   PetscFree(work);
342 
343   /* post receives:
344        1) each message will consist of ordered pairs
345      (global index,value) we store the global index as a double
346      to simplify the message passing.
347        2) since we don't know how long each individual message is we
348      allocate the largest needed buffer for each receive. Potentially
349      this is a lot of wasted space.
350 
351 
352        This could be done better.
353   */
354   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
355   CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
357   CHKPTRQ(recv_waits);
358   for ( i=0; i<nreceives; i++ ) {
359     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
360               comm,recv_waits+i);
361   }
362 
363   /* do sends:
364       1) starts[i] gives the starting index in svalues for stuff going to
365          the ith processor
366   */
367   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
368   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
369   CHKPTRQ(send_waits);
370   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
371   starts[0] = 0;
372   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
373   for ( i=0; i<aij->stash.n; i++ ) {
374     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
375     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
376     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
377   }
378   PetscFree(owner);
379   starts[0] = 0;
380   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
381   count = 0;
382   for ( i=0; i<size; i++ ) {
383     if (procs[i]) {
384       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
385                 comm,send_waits+count++);
386     }
387   }
388   PetscFree(starts); PetscFree(nprocs);
389 
390   /* Free cache space */
391   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
392   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
393 
394   aij->svalues    = svalues;    aij->rvalues    = rvalues;
395   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
396   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
397   aij->rmax       = nmax;
398 
399   return 0;
400 }
401 extern int MatSetUpMultiply_MPIAIJ(Mat);
402 
403 #undef __FUNC__
404 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
405 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
406 {
407   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
408   MPI_Status  *send_status,recv_status;
409   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
410   int         row,col,other_disassembled;
411   Scalar      *values,val;
412   InsertMode  addv = mat->insertmode;
413 
414   /*  wait on receives */
415   while (count) {
416     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
417     /* unpack receives into our local space */
418     values = aij->rvalues + 3*imdex*aij->rmax;
419     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
420     n = n/3;
421     for ( i=0; i<n; i++ ) {
422       row = (int) PetscReal(values[3*i]) - aij->rstart;
423       col = (int) PetscReal(values[3*i+1]);
424       val = values[3*i+2];
425       if (col >= aij->cstart && col < aij->cend) {
426         col -= aij->cstart;
427         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
428       }
429       else {
430         if (mat->was_assembled || mat->assembled) {
431           if (!aij->colmap) {
432             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
433           }
434           col = aij->colmap[col] - 1;
435           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
436             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
437             col = (int) PetscReal(values[3*i+1]);
438           }
439         }
440         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
441       }
442     }
443     count--;
444   }
445   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
446 
447   /* wait on sends */
448   if (aij->nsends) {
449     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
450     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
451     PetscFree(send_status);
452   }
453   PetscFree(aij->send_waits); PetscFree(aij->svalues);
454 
455   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
456   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
457 
458   /* determine if any processor has disassembled, if so we must
459      also disassemble ourselfs, in order that we may reassemble. */
460   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
461   if (mat->was_assembled && !other_disassembled) {
462     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
463   }
464 
465   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
466     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
467   }
468   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
469   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
470 
471   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
472   return 0;
473 }
474 
475 #undef __FUNC__
476 #define __FUNC__ "MatZeroEntries_MPIAIJ"
477 int MatZeroEntries_MPIAIJ(Mat A)
478 {
479   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
480   int        ierr;
481   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
482   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
483   return 0;
484 }
485 
486 /* the code does not do the diagonal entries correctly unless the
487    matrix is square and the column and row owerships are identical.
488    This is a BUG. The only way to fix it seems to be to access
489    aij->A and aij->B directly and not through the MatZeroRows()
490    routine.
491 */
492 #undef __FUNC__
493 #define __FUNC__ "MatZeroRows_MPIAIJ"
494 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
495 {
496   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
497   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
498   int            *procs,*nprocs,j,found,idx,nsends,*work;
499   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
500   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
501   int            *lens,imdex,*lrows,*values;
502   MPI_Comm       comm = A->comm;
503   MPI_Request    *send_waits,*recv_waits;
504   MPI_Status     recv_status,*send_status;
505   IS             istmp;
506 
507   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
508   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
509 
510   /*  first count number of contributors to each processor */
511   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
512   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
513   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
514   for ( i=0; i<N; i++ ) {
515     idx = rows[i];
516     found = 0;
517     for ( j=0; j<size; j++ ) {
518       if (idx >= owners[j] && idx < owners[j+1]) {
519         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
520       }
521     }
522     if (!found) SETERRQ(1,0,"Index out of range");
523   }
524   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
525 
526   /* inform other processors of number of messages and max length*/
527   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
528   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
529   nrecvs = work[rank];
530   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
531   nmax = work[rank];
532   PetscFree(work);
533 
534   /* post receives:   */
535   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
536   CHKPTRQ(rvalues);
537   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
538   CHKPTRQ(recv_waits);
539   for ( i=0; i<nrecvs; i++ ) {
540     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
541   }
542 
543   /* do sends:
544       1) starts[i] gives the starting index in svalues for stuff going to
545          the ith processor
546   */
547   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
548   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
549   CHKPTRQ(send_waits);
550   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
551   starts[0] = 0;
552   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
553   for ( i=0; i<N; i++ ) {
554     svalues[starts[owner[i]]++] = rows[i];
555   }
556   ISRestoreIndices(is,&rows);
557 
558   starts[0] = 0;
559   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
560   count = 0;
561   for ( i=0; i<size; i++ ) {
562     if (procs[i]) {
563       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
564     }
565   }
566   PetscFree(starts);
567 
568   base = owners[rank];
569 
570   /*  wait on receives */
571   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
572   source = lens + nrecvs;
573   count  = nrecvs; slen = 0;
574   while (count) {
575     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
576     /* unpack receives into our local space */
577     MPI_Get_count(&recv_status,MPI_INT,&n);
578     source[imdex]  = recv_status.MPI_SOURCE;
579     lens[imdex]  = n;
580     slen += n;
581     count--;
582   }
583   PetscFree(recv_waits);
584 
585   /* move the data into the send scatter */
586   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
587   count = 0;
588   for ( i=0; i<nrecvs; i++ ) {
589     values = rvalues + i*nmax;
590     for ( j=0; j<lens[i]; j++ ) {
591       lrows[count++] = values[j] - base;
592     }
593   }
594   PetscFree(rvalues); PetscFree(lens);
595   PetscFree(owner); PetscFree(nprocs);
596 
597   /* actually zap the local rows */
598   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
599   PLogObjectParent(A,istmp);
600   PetscFree(lrows);
601   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
602   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
603   ierr = ISDestroy(istmp); CHKERRQ(ierr);
604 
605   /* wait on sends */
606   if (nsends) {
607     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
608     CHKPTRQ(send_status);
609     MPI_Waitall(nsends,send_waits,send_status);
610     PetscFree(send_status);
611   }
612   PetscFree(send_waits); PetscFree(svalues);
613 
614   return 0;
615 }
616 
617 #undef __FUNC__
618 #define __FUNC__ "MatMult_MPIAIJ"
619 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
620 {
621   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
622   int        ierr,nt;
623 
624   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
625   if (nt != a->n) {
626     SETERRQ(1,0,"Incompatible partition of A and xx");
627   }
628   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
629   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
630   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
631   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
632   return 0;
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatMultAdd_MPIAIJ"
637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
638 {
639   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
640   int        ierr;
641   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
642   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
643   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
644   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
645   return 0;
646 }
647 
648 #undef __FUNC__
649 #define __FUNC__ "MatMultTrans_MPIAIJ"
650 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
651 {
652   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
653   int        ierr;
654 
655   /* do nondiagonal part */
656   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
657   /* send it on its way */
658   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
659   /* do local part */
660   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
661   /* receive remote parts: note this assumes the values are not actually */
662   /* inserted in yy until the next line, which is true for my implementation*/
663   /* but is not perhaps always true. */
664   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
665   return 0;
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
673   int        ierr;
674 
675   /* do nondiagonal part */
676   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
677   /* send it on its way */
678   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
679   /* do local part */
680   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
681   /* receive remote parts: note this assumes the values are not actually */
682   /* inserted in yy until the next line, which is true for my implementation*/
683   /* but is not perhaps always true. */
684   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
685   return 0;
686 }
687 
688 /*
689   This only works correctly for square matrices where the subblock A->A is the
690    diagonal block
691 */
692 #undef __FUNC__
693 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
694 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
695 {
696   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
697   if (a->M != a->N)
698     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
699   if (a->rstart != a->cstart || a->rend != a->cend) {
700     SETERRQ(1,0,"row partition must equal col partition");  }
701   return MatGetDiagonal(a->A,v);
702 }
703 
704 #undef __FUNC__
705 #define __FUNC__ "MatScale_MPIAIJ"
706 int MatScale_MPIAIJ(Scalar *aa,Mat A)
707 {
708   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
709   int        ierr;
710   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
711   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
712   return 0;
713 }
714 
715 #undef __FUNC__
716 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
717 int MatDestroy_MPIAIJ(PetscObject obj)
718 {
719   Mat        mat = (Mat) obj;
720   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
721   int        ierr;
722 #if defined(PETSC_LOG)
723   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
724 #endif
725   PetscFree(aij->rowners);
726   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
727   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
728   if (aij->colmap) PetscFree(aij->colmap);
729   if (aij->garray) PetscFree(aij->garray);
730   if (aij->lvec)   VecDestroy(aij->lvec);
731   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
732   if (aij->rowvalues) PetscFree(aij->rowvalues);
733   PetscFree(aij);
734   if (mat->mapping) {
735     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
736   }
737   PLogObjectDestroy(mat);
738   PetscHeaderDestroy(mat);
739   return 0;
740 }
741 
742 #undef __FUNC__
743 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
744 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
745 {
746   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
747   int         ierr;
748 
749   if (aij->size == 1) {
750     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
751   }
752   else SETERRQ(1,0,"Only uniprocessor output supported");
753   return 0;
754 }
755 
756 #undef __FUNC__
757 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
758 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
759 {
760   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
761   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
762   int         ierr, format,shift = C->indexshift,rank;
763   FILE        *fd;
764   ViewerType  vtype;
765 
766   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
767   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
768     ierr = ViewerGetFormat(viewer,&format);
769     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
770       MatInfo info;
771       int     flg;
772       MPI_Comm_rank(mat->comm,&rank);
773       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
774       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
775       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
776       PetscSequentialPhaseBegin(mat->comm,1);
777       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
778          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
779       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
780          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
781       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
782       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
783       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
784       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
785       fflush(fd);
786       PetscSequentialPhaseEnd(mat->comm,1);
787       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
788       return 0;
789     }
790     else if (format == VIEWER_FORMAT_ASCII_INFO) {
791       return 0;
792     }
793   }
794 
795   if (vtype == DRAW_VIEWER) {
796     Draw       draw;
797     PetscTruth isnull;
798     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
799     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
800   }
801 
802   if (vtype == ASCII_FILE_VIEWER) {
803     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
804     PetscSequentialPhaseBegin(mat->comm,1);
805     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
806            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
807            aij->cend);
808     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
809     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
810     fflush(fd);
811     PetscSequentialPhaseEnd(mat->comm,1);
812   }
813   else {
814     int size = aij->size;
815     rank = aij->rank;
816     if (size == 1) {
817       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
818     }
819     else {
820       /* assemble the entire matrix onto first processor. */
821       Mat         A;
822       Mat_SeqAIJ *Aloc;
823       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
824       Scalar      *a;
825 
826       if (!rank) {
827         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
828                CHKERRQ(ierr);
829       }
830       else {
831         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
832                CHKERRQ(ierr);
833       }
834       PLogObjectParent(mat,A);
835 
836       /* copy over the A part */
837       Aloc = (Mat_SeqAIJ*) aij->A->data;
838       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
839       row = aij->rstart;
840       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
841       for ( i=0; i<m; i++ ) {
842         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
843         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
844       }
845       aj = Aloc->j;
846       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
847 
848       /* copy over the B part */
849       Aloc = (Mat_SeqAIJ*) aij->B->data;
850       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
851       row = aij->rstart;
852       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
853       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
854       for ( i=0; i<m; i++ ) {
855         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
856         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
857       }
858       PetscFree(ct);
859       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
860       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
861       if (!rank) {
862         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
863       }
864       ierr = MatDestroy(A); CHKERRQ(ierr);
865     }
866   }
867   return 0;
868 }
869 
870 #undef __FUNC__
871 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
872 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
873 {
874   Mat         mat = (Mat) obj;
875   int         ierr;
876   ViewerType  vtype;
877 
878   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
879   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
880       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
881     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
882   }
883   else if (vtype == BINARY_FILE_VIEWER) {
884     return MatView_MPIAIJ_Binary(mat,viewer);
885   }
886   return 0;
887 }
888 
889 /*
890     This has to provide several versions.
891 
892      2) a) use only local smoothing updating outer values only once.
893         b) local smoothing updating outer values each inner iteration
894      3) color updating out values betwen colors.
895 */
896 #undef __FUNC__
897 #define __FUNC__ "MatRelax_MPIAIJ"
898 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
899                            double fshift,int its,Vec xx)
900 {
901   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
902   Mat        AA = mat->A, BB = mat->B;
903   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
904   Scalar     *b,*x,*xs,*ls,d,*v,sum;
905   int        ierr,*idx, *diag;
906   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
907 
908   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
909   xs = x + shift; /* shift by one for index start of 1 */
910   ls = ls + shift;
911   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
912   diag = A->diag;
913   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
914     if (flag & SOR_ZERO_INITIAL_GUESS) {
915       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
916     }
917     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
918     CHKERRQ(ierr);
919     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
920     CHKERRQ(ierr);
921     while (its--) {
922       /* go down through the rows */
923       for ( i=0; i<m; i++ ) {
924         n    = A->i[i+1] - A->i[i];
925         idx  = A->j + A->i[i] + shift;
926         v    = A->a + A->i[i] + shift;
927         sum  = b[i];
928         SPARSEDENSEMDOT(sum,xs,v,idx,n);
929         d    = fshift + A->a[diag[i]+shift];
930         n    = B->i[i+1] - B->i[i];
931         idx  = B->j + B->i[i] + shift;
932         v    = B->a + B->i[i] + shift;
933         SPARSEDENSEMDOT(sum,ls,v,idx,n);
934         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
935       }
936       /* come up through the rows */
937       for ( i=m-1; i>-1; i-- ) {
938         n    = A->i[i+1] - A->i[i];
939         idx  = A->j + A->i[i] + shift;
940         v    = A->a + A->i[i] + shift;
941         sum  = b[i];
942         SPARSEDENSEMDOT(sum,xs,v,idx,n);
943         d    = fshift + A->a[diag[i]+shift];
944         n    = B->i[i+1] - B->i[i];
945         idx  = B->j + B->i[i] + shift;
946         v    = B->a + B->i[i] + shift;
947         SPARSEDENSEMDOT(sum,ls,v,idx,n);
948         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
949       }
950     }
951   }
952   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
953     if (flag & SOR_ZERO_INITIAL_GUESS) {
954       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
955     }
956     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
957     CHKERRQ(ierr);
958     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
959     CHKERRQ(ierr);
960     while (its--) {
961       for ( i=0; i<m; i++ ) {
962         n    = A->i[i+1] - A->i[i];
963         idx  = A->j + A->i[i] + shift;
964         v    = A->a + A->i[i] + shift;
965         sum  = b[i];
966         SPARSEDENSEMDOT(sum,xs,v,idx,n);
967         d    = fshift + A->a[diag[i]+shift];
968         n    = B->i[i+1] - B->i[i];
969         idx  = B->j + B->i[i] + shift;
970         v    = B->a + B->i[i] + shift;
971         SPARSEDENSEMDOT(sum,ls,v,idx,n);
972         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
973       }
974     }
975   }
976   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
977     if (flag & SOR_ZERO_INITIAL_GUESS) {
978       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
979     }
980     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
981                             mat->Mvctx); CHKERRQ(ierr);
982     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
983                             mat->Mvctx); CHKERRQ(ierr);
984     while (its--) {
985       for ( i=m-1; i>-1; i-- ) {
986         n    = A->i[i+1] - A->i[i];
987         idx  = A->j + A->i[i] + shift;
988         v    = A->a + A->i[i] + shift;
989         sum  = b[i];
990         SPARSEDENSEMDOT(sum,xs,v,idx,n);
991         d    = fshift + A->a[diag[i]+shift];
992         n    = B->i[i+1] - B->i[i];
993         idx  = B->j + B->i[i] + shift;
994         v    = B->a + B->i[i] + shift;
995         SPARSEDENSEMDOT(sum,ls,v,idx,n);
996         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
997       }
998     }
999   }
1000   else {
1001     SETERRQ(1,0,"Option not supported");
1002   }
1003   return 0;
1004 }
1005 
1006 #undef __FUNC__
1007 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1009 {
1010   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1011   Mat        A = mat->A, B = mat->B;
1012   int        ierr;
1013   double     isend[5], irecv[5];
1014 
1015   info->rows_global    = (double)mat->M;
1016   info->columns_global = (double)mat->N;
1017   info->rows_local     = (double)mat->m;
1018   info->columns_local  = (double)mat->N;
1019   info->block_size     = 1.0;
1020   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1021   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1022   isend[3] = info->memory;  isend[4] = info->mallocs;
1023   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1024   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1025   isend[3] += info->memory;  isend[4] += info->mallocs;
1026   if (flag == MAT_LOCAL) {
1027     info->nz_used      = isend[0];
1028     info->nz_allocated = isend[1];
1029     info->nz_unneeded  = isend[2];
1030     info->memory       = isend[3];
1031     info->mallocs      = isend[4];
1032   } else if (flag == MAT_GLOBAL_MAX) {
1033     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1034     info->nz_used      = irecv[0];
1035     info->nz_allocated = irecv[1];
1036     info->nz_unneeded  = irecv[2];
1037     info->memory       = irecv[3];
1038     info->mallocs      = irecv[4];
1039   } else if (flag == MAT_GLOBAL_SUM) {
1040     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1041     info->nz_used      = irecv[0];
1042     info->nz_allocated = irecv[1];
1043     info->nz_unneeded  = irecv[2];
1044     info->memory       = irecv[3];
1045     info->mallocs      = irecv[4];
1046   }
1047   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1048   info->fill_ratio_needed = 0;
1049   info->factor_mallocs    = 0;
1050 
1051   return 0;
1052 }
1053 
1054 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1055 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1056 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1057 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1058 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1059 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1060 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1061 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1062 
1063 #undef __FUNC__
1064 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1065 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1066 {
1067   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1068 
1069   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1070       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1071       op == MAT_COLUMNS_UNSORTED ||
1072       op == MAT_COLUMNS_SORTED ||
1073       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1074       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1075         MatSetOption(a->A,op);
1076         MatSetOption(a->B,op);
1077   } else if (op == MAT_ROW_ORIENTED) {
1078     a->roworiented = 1;
1079     MatSetOption(a->A,op);
1080     MatSetOption(a->B,op);
1081   } else if (op == MAT_ROWS_SORTED ||
1082              op == MAT_ROWS_UNSORTED ||
1083              op == MAT_SYMMETRIC ||
1084              op == MAT_STRUCTURALLY_SYMMETRIC ||
1085              op == MAT_YES_NEW_DIAGONALS)
1086     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1087   else if (op == MAT_COLUMN_ORIENTED) {
1088     a->roworiented = 0;
1089     MatSetOption(a->A,op);
1090     MatSetOption(a->B,op);
1091   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1092     a->donotstash = 1;
1093   } else if (op == MAT_NO_NEW_DIAGONALS)
1094     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1095   else
1096     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1097   return 0;
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1105   *m = mat->M; *n = mat->N;
1106   return 0;
1107 }
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1111 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1112 {
1113   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1114   *m = mat->m; *n = mat->N;
1115   return 0;
1116 }
1117 
1118 #undef __FUNC__
1119 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1120 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1121 {
1122   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1123   *m = mat->rstart; *n = mat->rend;
1124   return 0;
1125 }
1126 
1127 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1128 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1129 
1130 #undef __FUNC__
1131 #define __FUNC__ "MatGetRow_MPIAIJ"
1132 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1133 {
1134   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1135   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1136   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1137   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1138   int        *cmap, *idx_p;
1139 
1140   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1141   mat->getrowactive = PETSC_TRUE;
1142 
1143   if (!mat->rowvalues && (idx || v)) {
1144     /*
1145         allocate enough space to hold information from the longest row.
1146     */
1147     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1148     int     max = 1,m = mat->m,tmp;
1149     for ( i=0; i<m; i++ ) {
1150       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1151       if (max < tmp) { max = tmp; }
1152     }
1153     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1154     CHKPTRQ(mat->rowvalues);
1155     mat->rowindices = (int *) (mat->rowvalues + max);
1156   }
1157 
1158   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1159   lrow = row - rstart;
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1165   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for ( i=0; i<nzB; i++ ) {
1176           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1181         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for ( i=0; i<imark; i++ ) {
1187             idx_p[i] = cmap[cworkB[i]];
1188           }
1189         } else {
1190           for ( i=0; i<nzB; i++ ) {
1191             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1192             else break;
1193           }
1194           imark = i;
1195         }
1196         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1197         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1198       }
1199     }
1200     else {
1201       if (idx) *idx = 0;
1202       if (v)   *v   = 0;
1203     }
1204   }
1205   *nz = nztot;
1206   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1207   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1208   return 0;
1209 }
1210 
1211 #undef __FUNC__
1212 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1213 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1214 {
1215   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1216   if (aij->getrowactive == PETSC_FALSE) {
1217     SETERRQ(1,0,"MatGetRow not called");
1218   }
1219   aij->getrowactive = PETSC_FALSE;
1220   return 0;
1221 }
1222 
1223 #undef __FUNC__
1224 #define __FUNC__ "MatNorm_MPIAIJ"
1225 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1226 {
1227   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1228   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1229   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1230   double     sum = 0.0;
1231   Scalar     *v;
1232 
1233   if (aij->size == 1) {
1234     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1235   } else {
1236     if (type == NORM_FROBENIUS) {
1237       v = amat->a;
1238       for (i=0; i<amat->nz; i++ ) {
1239 #if defined(PETSC_COMPLEX)
1240         sum += real(conj(*v)*(*v)); v++;
1241 #else
1242         sum += (*v)*(*v); v++;
1243 #endif
1244       }
1245       v = bmat->a;
1246       for (i=0; i<bmat->nz; i++ ) {
1247 #if defined(PETSC_COMPLEX)
1248         sum += real(conj(*v)*(*v)); v++;
1249 #else
1250         sum += (*v)*(*v); v++;
1251 #endif
1252       }
1253       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1254       *norm = sqrt(*norm);
1255     }
1256     else if (type == NORM_1) { /* max column norm */
1257       double *tmp, *tmp2;
1258       int    *jj, *garray = aij->garray;
1259       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1260       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1261       PetscMemzero(tmp,aij->N*sizeof(double));
1262       *norm = 0.0;
1263       v = amat->a; jj = amat->j;
1264       for ( j=0; j<amat->nz; j++ ) {
1265         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1266       }
1267       v = bmat->a; jj = bmat->j;
1268       for ( j=0; j<bmat->nz; j++ ) {
1269         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1270       }
1271       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1272       for ( j=0; j<aij->N; j++ ) {
1273         if (tmp2[j] > *norm) *norm = tmp2[j];
1274       }
1275       PetscFree(tmp); PetscFree(tmp2);
1276     }
1277     else if (type == NORM_INFINITY) { /* max row norm */
1278       double ntemp = 0.0;
1279       for ( j=0; j<amat->m; j++ ) {
1280         v = amat->a + amat->i[j] + shift;
1281         sum = 0.0;
1282         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1283           sum += PetscAbsScalar(*v); v++;
1284         }
1285         v = bmat->a + bmat->i[j] + shift;
1286         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1287           sum += PetscAbsScalar(*v); v++;
1288         }
1289         if (sum > ntemp) ntemp = sum;
1290       }
1291       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1292     }
1293     else {
1294       SETERRQ(1,0,"No support for two norm");
1295     }
1296   }
1297   return 0;
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatTranspose_MPIAIJ"
1302 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1303 {
1304   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1305   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1306   int        ierr,shift = Aloc->indexshift;
1307   Mat        B;
1308   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1309   Scalar     *array;
1310 
1311   if (matout == PETSC_NULL && M != N)
1312     SETERRQ(1,0,"Square matrix only for in-place");
1313   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1314          PETSC_NULL,&B); CHKERRQ(ierr);
1315 
1316   /* copy over the A part */
1317   Aloc = (Mat_SeqAIJ*) a->A->data;
1318   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1319   row = a->rstart;
1320   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1321   for ( i=0; i<m; i++ ) {
1322     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1323     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1324   }
1325   aj = Aloc->j;
1326   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1327 
1328   /* copy over the B part */
1329   Aloc = (Mat_SeqAIJ*) a->B->data;
1330   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1331   row = a->rstart;
1332   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1333   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1334   for ( i=0; i<m; i++ ) {
1335     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1336     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1337   }
1338   PetscFree(ct);
1339   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1340   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1341   if (matout != PETSC_NULL) {
1342     *matout = B;
1343   } else {
1344     /* This isn't really an in-place transpose .... but free data structures from a */
1345     PetscFree(a->rowners);
1346     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1347     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1348     if (a->colmap) PetscFree(a->colmap);
1349     if (a->garray) PetscFree(a->garray);
1350     if (a->lvec) VecDestroy(a->lvec);
1351     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1352     PetscFree(a);
1353     PetscMemcpy(A,B,sizeof(struct _Mat));
1354     PetscHeaderDestroy(B);
1355   }
1356   return 0;
1357 }
1358 
1359 #undef __FUNC__
1360 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1361 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1362 {
1363   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1364   Mat a = aij->A, b = aij->B;
1365   int ierr,s1,s2,s3;
1366 
1367   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1368   if (rr) {
1369     s3 = aij->n;
1370     VecGetLocalSize_Fast(rr,s1);
1371     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1372     /* Overlap communication with computation. */
1373     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1374   }
1375   if (ll) {
1376     VecGetLocalSize_Fast(ll,s1);
1377     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1378     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1379   }
1380   /* scale  the diagonal block */
1381   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1382 
1383   if (rr) {
1384     /* Do a scatter end and then right scale the off-diagonal block */
1385     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1386     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1387   }
1388 
1389   return 0;
1390 }
1391 
1392 
1393 extern int MatPrintHelp_SeqAIJ(Mat);
1394 #undef __FUNC__
1395 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1396 int MatPrintHelp_MPIAIJ(Mat A)
1397 {
1398   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1399 
1400   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1401   else return 0;
1402 }
1403 
1404 #undef __FUNC__
1405 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1406 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1407 {
1408   *bs = 1;
1409   return 0;
1410 }
1411 #undef __FUNC__
1412 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1413 int MatSetUnfactored_MPIAIJ(Mat A)
1414 {
1415   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1416   int        ierr;
1417   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1418   return 0;
1419 }
1420 
1421 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1422 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1423 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1424 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1425 /* -------------------------------------------------------------------*/
1426 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1427        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1428        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1429        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1430        0,0,
1431        0,0,
1432        0,0,
1433        MatRelax_MPIAIJ,
1434        MatTranspose_MPIAIJ,
1435        MatGetInfo_MPIAIJ,0,
1436        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1437        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1438        0,
1439        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1440        0,0,0,0,
1441        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1442        0,0,
1443        0,0,MatConvertSameType_MPIAIJ,0,0,
1444        0,0,0,
1445        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1446        MatPrintHelp_MPIAIJ,
1447        MatScale_MPIAIJ,0,0,0,
1448        MatGetBlockSize_MPIAIJ,0,0,0,0,
1449        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1450 
1451 
1452 #undef __FUNC__
1453 #define __FUNC__ "MatCreateMPIAIJ"
1454 /*@C
1455    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1456    (the default parallel PETSc format).  For good matrix assembly performance
1457    the user should preallocate the matrix storage by setting the parameters
1458    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1459    performance can be increased by more than a factor of 50.
1460 
1461    Input Parameters:
1462 .  comm - MPI communicator
1463 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1464            This value should be the same as the local size used in creating the
1465            y vector for the matrix-vector product y = Ax.
1466 .  n - This value should be the same as the local size used in creating the
1467        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1468        calculated if N is given) For square matrices n is almost always m.
1469 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1470 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1471 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1472            (same for all local rows)
1473 .  d_nzz - array containing the number of nonzeros in the various rows of the
1474            diagonal portion of the local submatrix (possibly different for each row)
1475            or PETSC_NULL. You must leave room for the diagonal entry even if
1476            it is zero.
1477 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1478            submatrix (same for all local rows).
1479 .  o_nzz - array containing the number of nonzeros in the various rows of the
1480            off-diagonal portion of the local submatrix (possibly different for
1481            each row) or PETSC_NULL.
1482 
1483    Output Parameter:
1484 .  A - the matrix
1485 
1486    Notes:
1487    The AIJ format (also called the Yale sparse matrix format or
1488    compressed row storage), is fully compatible with standard Fortran 77
1489    storage.  That is, the stored row and column indices can begin at
1490    either one (as in Fortran) or zero.  See the users manual for details.
1491 
1492    The user MUST specify either the local or global matrix dimensions
1493    (possibly both).
1494 
1495    By default, this format uses inodes (identical nodes) when possible.
1496    We search for consecutive rows with the same nonzero structure, thereby
1497    reusing matrix information to achieve increased efficiency.
1498 
1499    Options Database Keys:
1500 $    -mat_aij_no_inode  - Do not use inodes
1501 $    -mat_aij_inode_limit <limit> - Set inode limit.
1502 $        (max limit=5)
1503 $    -mat_aij_oneindex - Internally use indexing starting at 1
1504 $        rather than 0.  Note: When calling MatSetValues(),
1505 $        the user still MUST index entries starting at 0!
1506 
1507    Storage Information:
1508    For a square global matrix we define each processor's diagonal portion
1509    to be its local rows and the corresponding columns (a square submatrix);
1510    each processor's off-diagonal portion encompasses the remainder of the
1511    local matrix (a rectangular submatrix).
1512 
1513    The user can specify preallocated storage for the diagonal part of
1514    the local submatrix with either d_nz or d_nnz (not both).  Set
1515    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1516    memory allocation.  Likewise, specify preallocated storage for the
1517    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1518 
1519    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1520    the figure below we depict these three local rows and all columns (0-11).
1521 
1522 $          0 1 2 3 4 5 6 7 8 9 10 11
1523 $         -------------------
1524 $  row 3  |  o o o d d d o o o o o o
1525 $  row 4  |  o o o d d d o o o o o o
1526 $  row 5  |  o o o d d d o o o o o o
1527 $         -------------------
1528 $
1529 
1530    Thus, any entries in the d locations are stored in the d (diagonal)
1531    submatrix, and any entries in the o locations are stored in the
1532    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1533    stored simply in the MATSEQAIJ format for compressed row storage.
1534 
1535    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1536    and o_nz should indicate the number of nonzeros per row in the o matrix.
1537    In general, for PDE problems in which most nonzeros are near the diagonal,
1538    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1539    or you will get TERRIBLE performance; see the users' manual chapter on
1540    matrices.
1541 
1542 .keywords: matrix, aij, compressed row, sparse, parallel
1543 
1544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1545 @*/
1546 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1547                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1548 {
1549   Mat          B;
1550   Mat_MPIAIJ   *b;
1551   int          ierr, i,sum[2],work[2],size;
1552 
1553   *A = 0;
1554   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1555   PLogObjectCreate(B);
1556   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1557   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1558   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1559   MPI_Comm_size(comm,&size);
1560   if (size == 1) {
1561     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1562     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1563     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1564     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1565     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1566     B->ops.solve             = MatSolve_MPIAIJ;
1567     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1568     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1569     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1570     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1571   }
1572   B->destroy    = MatDestroy_MPIAIJ;
1573   B->view       = MatView_MPIAIJ;
1574   B->factor     = 0;
1575   B->assembled  = PETSC_FALSE;
1576   B->mapping    = 0;
1577 
1578   B->insertmode = NOT_SET_VALUES;
1579   MPI_Comm_rank(comm,&b->rank);
1580   MPI_Comm_size(comm,&b->size);
1581 
1582   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1583     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1584 
1585   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1586     work[0] = m; work[1] = n;
1587     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1588     if (M == PETSC_DECIDE) M = sum[0];
1589     if (N == PETSC_DECIDE) N = sum[1];
1590   }
1591   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1592   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1593   b->m = m; B->m = m;
1594   b->n = n; B->n = n;
1595   b->N = N; B->N = N;
1596   b->M = M; B->M = M;
1597 
1598   /* build local table of row and column ownerships */
1599   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1600   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1601   b->cowners = b->rowners + b->size + 2;
1602   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1603   b->rowners[0] = 0;
1604   for ( i=2; i<=b->size; i++ ) {
1605     b->rowners[i] += b->rowners[i-1];
1606   }
1607   b->rstart = b->rowners[b->rank];
1608   b->rend   = b->rowners[b->rank+1];
1609   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1610   b->cowners[0] = 0;
1611   for ( i=2; i<=b->size; i++ ) {
1612     b->cowners[i] += b->cowners[i-1];
1613   }
1614   b->cstart = b->cowners[b->rank];
1615   b->cend   = b->cowners[b->rank+1];
1616 
1617   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1618   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1619   PLogObjectParent(B,b->A);
1620   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1621   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1622   PLogObjectParent(B,b->B);
1623 
1624   /* build cache for off array entries formed */
1625   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1626   b->donotstash  = 0;
1627   b->colmap      = 0;
1628   b->garray      = 0;
1629   b->roworiented = 1;
1630 
1631   /* stuff used for matrix vector multiply */
1632   b->lvec      = 0;
1633   b->Mvctx     = 0;
1634 
1635   /* stuff for MatGetRow() */
1636   b->rowindices   = 0;
1637   b->rowvalues    = 0;
1638   b->getrowactive = PETSC_FALSE;
1639 
1640   *A = B;
1641   return 0;
1642 }
1643 
1644 #undef __FUNC__
1645 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1646 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1647 {
1648   Mat        mat;
1649   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1650   int        ierr, len=0, flg;
1651 
1652   *newmat       = 0;
1653   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1654   PLogObjectCreate(mat);
1655   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1656   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1657   mat->destroy    = MatDestroy_MPIAIJ;
1658   mat->view       = MatView_MPIAIJ;
1659   mat->factor     = matin->factor;
1660   mat->assembled  = PETSC_TRUE;
1661 
1662   a->m = mat->m   = oldmat->m;
1663   a->n = mat->n   = oldmat->n;
1664   a->M = mat->M   = oldmat->M;
1665   a->N = mat->N   = oldmat->N;
1666 
1667   a->rstart       = oldmat->rstart;
1668   a->rend         = oldmat->rend;
1669   a->cstart       = oldmat->cstart;
1670   a->cend         = oldmat->cend;
1671   a->size         = oldmat->size;
1672   a->rank         = oldmat->rank;
1673   mat->insertmode = NOT_SET_VALUES;
1674   a->rowvalues    = 0;
1675   a->getrowactive = PETSC_FALSE;
1676 
1677   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1678   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1679   a->cowners = a->rowners + a->size + 2;
1680   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1681   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1682   if (oldmat->colmap) {
1683     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1684     PLogObjectMemory(mat,(a->N)*sizeof(int));
1685     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1686   } else a->colmap = 0;
1687   if (oldmat->garray) {
1688     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1689     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1690     PLogObjectMemory(mat,len*sizeof(int));
1691     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1692   } else a->garray = 0;
1693 
1694   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1695   PLogObjectParent(mat,a->lvec);
1696   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1697   PLogObjectParent(mat,a->Mvctx);
1698   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1699   PLogObjectParent(mat,a->A);
1700   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1701   PLogObjectParent(mat,a->B);
1702   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1703   if (flg) {
1704     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1705   }
1706   *newmat = mat;
1707   return 0;
1708 }
1709 
1710 #include "sys.h"
1711 
1712 #undef __FUNC__
1713 #define __FUNC__ "MatLoad_MPIAIJ"
1714 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1715 {
1716   Mat          A;
1717   int          i, nz, ierr, j,rstart, rend, fd;
1718   Scalar       *vals,*svals;
1719   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1720   MPI_Status   status;
1721   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1722   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1723   int          tag = ((PetscObject)viewer)->tag;
1724 
1725   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1726   if (!rank) {
1727     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1728     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1729     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1730   }
1731 
1732   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1733   M = header[1]; N = header[2];
1734   /* determine ownership of all rows */
1735   m = M/size + ((M % size) > rank);
1736   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1737   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1738   rowners[0] = 0;
1739   for ( i=2; i<=size; i++ ) {
1740     rowners[i] += rowners[i-1];
1741   }
1742   rstart = rowners[rank];
1743   rend   = rowners[rank+1];
1744 
1745   /* distribute row lengths to all processors */
1746   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1747   offlens = ourlens + (rend-rstart);
1748   if (!rank) {
1749     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1750     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1751     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1752     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1753     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1754     PetscFree(sndcounts);
1755   }
1756   else {
1757     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1758   }
1759 
1760   if (!rank) {
1761     /* calculate the number of nonzeros on each processor */
1762     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1763     PetscMemzero(procsnz,size*sizeof(int));
1764     for ( i=0; i<size; i++ ) {
1765       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1766         procsnz[i] += rowlengths[j];
1767       }
1768     }
1769     PetscFree(rowlengths);
1770 
1771     /* determine max buffer needed and allocate it */
1772     maxnz = 0;
1773     for ( i=0; i<size; i++ ) {
1774       maxnz = PetscMax(maxnz,procsnz[i]);
1775     }
1776     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1777 
1778     /* read in my part of the matrix column indices  */
1779     nz = procsnz[0];
1780     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1781     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1782 
1783     /* read in every one elses and ship off */
1784     for ( i=1; i<size; i++ ) {
1785       nz = procsnz[i];
1786       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1787       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1788     }
1789     PetscFree(cols);
1790   }
1791   else {
1792     /* determine buffer space needed for message */
1793     nz = 0;
1794     for ( i=0; i<m; i++ ) {
1795       nz += ourlens[i];
1796     }
1797     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1798 
1799     /* receive message of column indices*/
1800     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1801     MPI_Get_count(&status,MPI_INT,&maxnz);
1802     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1803   }
1804 
1805   /* loop over local rows, determining number of off diagonal entries */
1806   PetscMemzero(offlens,m*sizeof(int));
1807   jj = 0;
1808   for ( i=0; i<m; i++ ) {
1809     for ( j=0; j<ourlens[i]; j++ ) {
1810       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1811       jj++;
1812     }
1813   }
1814 
1815   /* create our matrix */
1816   for ( i=0; i<m; i++ ) {
1817     ourlens[i] -= offlens[i];
1818   }
1819   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1820   A = *newmat;
1821   MatSetOption(A,MAT_COLUMNS_SORTED);
1822   for ( i=0; i<m; i++ ) {
1823     ourlens[i] += offlens[i];
1824   }
1825 
1826   if (!rank) {
1827     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1828 
1829     /* read in my part of the matrix numerical values  */
1830     nz = procsnz[0];
1831     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1832 
1833     /* insert into matrix */
1834     jj      = rstart;
1835     smycols = mycols;
1836     svals   = vals;
1837     for ( i=0; i<m; i++ ) {
1838       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1839       smycols += ourlens[i];
1840       svals   += ourlens[i];
1841       jj++;
1842     }
1843 
1844     /* read in other processors and ship out */
1845     for ( i=1; i<size; i++ ) {
1846       nz = procsnz[i];
1847       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1848       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1849     }
1850     PetscFree(procsnz);
1851   }
1852   else {
1853     /* receive numeric values */
1854     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1855 
1856     /* receive message of values*/
1857     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1858     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1859     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1860 
1861     /* insert into matrix */
1862     jj      = rstart;
1863     smycols = mycols;
1864     svals   = vals;
1865     for ( i=0; i<m; i++ ) {
1866       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1867       smycols += ourlens[i];
1868       svals   += ourlens[i];
1869       jj++;
1870     }
1871   }
1872   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1873 
1874   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1875   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1876   return 0;
1877 }
1878