xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision abffa341d06f511e7d690deae555d0d92cf30f96)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.196 1997/03/29 00:59:34 balay Exp balay $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = aimax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66       for ( _i=0; _i<nrow; _i++ ) { \
67         if (rp[_i] > col1) break; \
68         if (rp[_i] == col1) { \
69           if (addv == ADD_VALUES) ap[_i] += value;   \
70           else                  ap[_i] = value; \
71           goto a_noinsert; \
72         } \
73       }  \
74       if (nonew)  goto a_noinsert; \
75       if (nrow >= rmax) { \
76         /* there is no extra room in row, therefore enlarge */ \
77         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
78         Scalar *new_a; \
79  \
80         /* malloc new storage space */ \
81         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
82         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
83         new_j   = (int *) (new_a + new_nz); \
84         new_i   = new_j + new_nz; \
85  \
86         /* copy over old data into new slots */ \
87         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
88         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
89         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
90         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
91         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
92                                                            len*sizeof(int)); \
93         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
94         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
95                                                            len*sizeof(Scalar));  \
96         /* free up old matrix storage */ \
97  \
98         PetscFree(a->a);  \
99         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
100         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
101         a->singlemalloc = 1; \
102  \
103         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
104         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
105         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
106         a->maxnz += CHUNKSIZE; \
107         a->reallocs++; \
108       } \
109       N = nrow++ - 1; a->nz++; \
110       /* shift up all the later entries in this row */ \
111       for ( ii=N; ii>=_i; ii-- ) { \
112         rp[ii+1] = rp[ii]; \
113         ap[ii+1] = ap[ii]; \
114       } \
115       rp[_i] = col1;  \
116       ap[_i] = value;  \
117       a_noinsert: ; \
118       ailen[row] = nrow; \
119 }
120 
121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
122 { \
123  \
124     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
125     rmax = bimax[row]; nrow = bilen[row];  \
126     col1 = col - shift; \
127      \
128       for ( _i=0; _i<nrow; _i++ ) { \
129         if (rp[_i] > col1) break; \
130         if (rp[_i] == col1) { \
131           if (addv == ADD_VALUES) ap[_i] += value;   \
132           else                  ap[_i] = value; \
133           goto b_noinsert; \
134         } \
135       }  \
136       if (nonew)  goto b_noinsert; \
137       if (nrow >= rmax) { \
138         /* there is no extra room in row, therefore enlarge */ \
139         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
140         Scalar *new_a; \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
144         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
145         new_j   = (int *) (new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
150         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(int)); \
155         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
156         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(Scalar));  \
158         /* free up old matrix storage */ \
159  \
160         PetscFree(b->a);  \
161         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
162         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
163         b->singlemalloc = 1; \
164  \
165         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
166         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
167         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
168         b->maxnz += CHUNKSIZE; \
169         b->reallocs++; \
170       } \
171       N = nrow++ - 1; b->nz++; \
172       /* shift up all the later entries in this row */ \
173       for ( ii=N; ii>=_i; ii-- ) { \
174         rp[ii+1] = rp[ii]; \
175         ap[ii+1] = ap[ii]; \
176       } \
177       rp[_i] = col1;  \
178       ap[_i] = value;  \
179       b_noinsert: ; \
180       bilen[row] = nrow; \
181 }
182 
183 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
184 #undef __FUNC__
185 #define __FUNC__ "MatSetValues_MPIAIJ"
186 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
189   Scalar     value;
190   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
191   int        cstart = aij->cstart, cend = aij->cend,row,col;
192   int        roworiented = aij->roworiented;
193 
194   /* Some Variables required in the macro */
195   Mat        A = aij->A;
196   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
197   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
198   Scalar     *aa = a->a;
199 
200   Mat        B = aij->B;
201   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
202   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
203   Scalar     *ba = b->a;
204 
205   int        *rp,ii,nrow,_i,rmax, N, col1;
206   int        nonew = a->nonew,shift = a->indexshift;
207   Scalar     *ap;
208 
209   for ( i=0; i<m; i++ ) {
210 #if defined(PETSC_BOPT_g)
211     if (im[i] < 0) SETERRQ(1,0,"Negative row");
212     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
213 #endif
214     if (im[i] >= rstart && im[i] < rend) {
215       row = im[i] - rstart;
216       for ( j=0; j<n; j++ ) {
217         if (in[j] >= cstart && in[j] < cend){
218           col = in[j] - cstart;
219           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
220           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
221           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
222         }
223 #if defined(PETSC_BOPT_g)
224         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
225         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
226 #endif
227         else {
228           if (mat->was_assembled) {
229             if (!aij->colmap) {
230               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
231             }
232             col = aij->colmap[in[j]] - 1;
233             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
234               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
235               col =  in[j];
236             }
237           }
238           else col = in[j];
239           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
240           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
241           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
242         }
243       }
244     }
245     else {
246       if (roworiented && !aij->donotstash) {
247         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
248       }
249       else {
250         if (!aij->donotstash) {
251           row = im[i];
252           for ( j=0; j<n; j++ ) {
253             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
254           }
255         }
256       }
257     }
258   }
259   return 0;
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatGetValues_MPIAIJ"
264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
265 {
266   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
267   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
268   int        cstart = aij->cstart, cend = aij->cend,row,col;
269 
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     }
294     else {
295       SETERRQ(1,0,"Only local values currently supported");
296     }
297   }
298   return 0;
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   /* make sure all processors are either in INSERTMODE or ADDMODE */
315   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
316   if (addv == (ADD_VALUES|INSERT_VALUES)) {
317     SETERRQ(1,0,"Some processors inserted others added");
318   }
319   mat->insertmode = addv; /* in case this processor had no cache */
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
325   for ( i=0; i<aij->stash.n; i++ ) {
326     idx = aij->stash.idx[i];
327     for ( j=0; j<size; j++ ) {
328       if (idx >= owners[j] && idx < owners[j+1]) {
329         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
330       }
331     }
332   }
333   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
334 
335   /* inform other processors of number of messages and max length*/
336   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
337   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
338   nreceives = work[rank];
339   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
340   nmax = work[rank];
341   PetscFree(work);
342 
343   /* post receives:
344        1) each message will consist of ordered pairs
345      (global index,value) we store the global index as a double
346      to simplify the message passing.
347        2) since we don't know how long each individual message is we
348      allocate the largest needed buffer for each receive. Potentially
349      this is a lot of wasted space.
350 
351 
352        This could be done better.
353   */
354   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
355   CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
357   CHKPTRQ(recv_waits);
358   for ( i=0; i<nreceives; i++ ) {
359     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
360               comm,recv_waits+i);
361   }
362 
363   /* do sends:
364       1) starts[i] gives the starting index in svalues for stuff going to
365          the ith processor
366   */
367   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
368   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
369   CHKPTRQ(send_waits);
370   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
371   starts[0] = 0;
372   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
373   for ( i=0; i<aij->stash.n; i++ ) {
374     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
375     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
376     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
377   }
378   PetscFree(owner);
379   starts[0] = 0;
380   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
381   count = 0;
382   for ( i=0; i<size; i++ ) {
383     if (procs[i]) {
384       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
385                 comm,send_waits+count++);
386     }
387   }
388   PetscFree(starts); PetscFree(nprocs);
389 
390   /* Free cache space */
391   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
392   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
393 
394   aij->svalues    = svalues;    aij->rvalues    = rvalues;
395   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
396   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
397   aij->rmax       = nmax;
398 
399   return 0;
400 }
401 extern int MatSetUpMultiply_MPIAIJ(Mat);
402 
403 #undef __FUNC__
404 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
405 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
406 {
407   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
408   MPI_Status  *send_status,recv_status;
409   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
410   int         row,col,other_disassembled;
411   Scalar      *values,val;
412   InsertMode  addv = mat->insertmode;
413 
414   /*  wait on receives */
415   while (count) {
416     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
417     /* unpack receives into our local space */
418     values = aij->rvalues + 3*imdex*aij->rmax;
419     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
420     n = n/3;
421     for ( i=0; i<n; i++ ) {
422       row = (int) PetscReal(values[3*i]) - aij->rstart;
423       col = (int) PetscReal(values[3*i+1]);
424       val = values[3*i+2];
425       if (col >= aij->cstart && col < aij->cend) {
426         col -= aij->cstart;
427         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
428       }
429       else {
430         if (mat->was_assembled || mat->assembled) {
431           if (!aij->colmap) {
432             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
433           }
434           col = aij->colmap[col] - 1;
435           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
436             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
437             col = (int) PetscReal(values[3*i+1]);
438           }
439         }
440         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
441       }
442     }
443     count--;
444   }
445   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
446 
447   /* wait on sends */
448   if (aij->nsends) {
449     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
450     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
451     PetscFree(send_status);
452   }
453   PetscFree(aij->send_waits); PetscFree(aij->svalues);
454 
455   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
456   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
457 
458   /* determine if any processor has disassembled, if so we must
459      also disassemble ourselfs, in order that we may reassemble. */
460   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
461   if (mat->was_assembled && !other_disassembled) {
462     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
463   }
464 
465   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
466     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
467   }
468   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
469   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
470 
471   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
472   return 0;
473 }
474 
475 #undef __FUNC__
476 #define __FUNC__ "MatZeroEntries_MPIAIJ"
477 int MatZeroEntries_MPIAIJ(Mat A)
478 {
479   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
480   int        ierr;
481   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
482   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
483   return 0;
484 }
485 
486 /* the code does not do the diagonal entries correctly unless the
487    matrix is square and the column and row owerships are identical.
488    This is a BUG. The only way to fix it seems to be to access
489    aij->A and aij->B directly and not through the MatZeroRows()
490    routine.
491 */
492 #undef __FUNC__
493 #define __FUNC__ "MatZeroRows_MPIAIJ"
494 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
495 {
496   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
497   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
498   int            *procs,*nprocs,j,found,idx,nsends,*work;
499   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
500   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
501   int            *lens,imdex,*lrows,*values;
502   MPI_Comm       comm = A->comm;
503   MPI_Request    *send_waits,*recv_waits;
504   MPI_Status     recv_status,*send_status;
505   IS             istmp;
506 
507   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
508   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
509 
510   /*  first count number of contributors to each processor */
511   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
512   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
513   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
514   for ( i=0; i<N; i++ ) {
515     idx = rows[i];
516     found = 0;
517     for ( j=0; j<size; j++ ) {
518       if (idx >= owners[j] && idx < owners[j+1]) {
519         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
520       }
521     }
522     if (!found) SETERRQ(1,0,"Index out of range");
523   }
524   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
525 
526   /* inform other processors of number of messages and max length*/
527   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
528   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
529   nrecvs = work[rank];
530   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
531   nmax = work[rank];
532   PetscFree(work);
533 
534   /* post receives:   */
535   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
536   CHKPTRQ(rvalues);
537   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
538   CHKPTRQ(recv_waits);
539   for ( i=0; i<nrecvs; i++ ) {
540     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
541   }
542 
543   /* do sends:
544       1) starts[i] gives the starting index in svalues for stuff going to
545          the ith processor
546   */
547   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
548   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
549   CHKPTRQ(send_waits);
550   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
551   starts[0] = 0;
552   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
553   for ( i=0; i<N; i++ ) {
554     svalues[starts[owner[i]]++] = rows[i];
555   }
556   ISRestoreIndices(is,&rows);
557 
558   starts[0] = 0;
559   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
560   count = 0;
561   for ( i=0; i<size; i++ ) {
562     if (procs[i]) {
563       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
564     }
565   }
566   PetscFree(starts);
567 
568   base = owners[rank];
569 
570   /*  wait on receives */
571   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
572   source = lens + nrecvs;
573   count  = nrecvs; slen = 0;
574   while (count) {
575     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
576     /* unpack receives into our local space */
577     MPI_Get_count(&recv_status,MPI_INT,&n);
578     source[imdex]  = recv_status.MPI_SOURCE;
579     lens[imdex]  = n;
580     slen += n;
581     count--;
582   }
583   PetscFree(recv_waits);
584 
585   /* move the data into the send scatter */
586   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
587   count = 0;
588   for ( i=0; i<nrecvs; i++ ) {
589     values = rvalues + i*nmax;
590     for ( j=0; j<lens[i]; j++ ) {
591       lrows[count++] = values[j] - base;
592     }
593   }
594   PetscFree(rvalues); PetscFree(lens);
595   PetscFree(owner); PetscFree(nprocs);
596 
597   /* actually zap the local rows */
598   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
599   PLogObjectParent(A,istmp);
600   PetscFree(lrows);
601   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
602   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
603   ierr = ISDestroy(istmp); CHKERRQ(ierr);
604 
605   /* wait on sends */
606   if (nsends) {
607     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
608     CHKPTRQ(send_status);
609     MPI_Waitall(nsends,send_waits,send_status);
610     PetscFree(send_status);
611   }
612   PetscFree(send_waits); PetscFree(svalues);
613 
614   return 0;
615 }
616 
617 #undef __FUNC__
618 #define __FUNC__ "MatMult_MPIAIJ"
619 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
620 {
621   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
622   int        ierr,nt;
623 
624   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
625   if (nt != a->n) {
626     SETERRQ(1,0,"Incompatible partition of A and xx");
627   }
628   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
629   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
630   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
631   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
632   return 0;
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatMultAdd_MPIAIJ"
637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
638 {
639   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
640   int        ierr;
641   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
642   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
643   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
644   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
645   return 0;
646 }
647 
648 #undef __FUNC__
649 #define __FUNC__ "MatMultTrans_MPIAIJ"
650 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
651 {
652   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
653   int        ierr;
654 
655   /* do nondiagonal part */
656   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
657   /* send it on its way */
658   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
659   /* do local part */
660   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
661   /* receive remote parts: note this assumes the values are not actually */
662   /* inserted in yy until the next line, which is true for my implementation*/
663   /* but is not perhaps always true. */
664   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
665   return 0;
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
673   int        ierr;
674 
675   /* do nondiagonal part */
676   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
677   /* send it on its way */
678   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
679   /* do local part */
680   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
681   /* receive remote parts: note this assumes the values are not actually */
682   /* inserted in yy until the next line, which is true for my implementation*/
683   /* but is not perhaps always true. */
684   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
685   return 0;
686 }
687 
688 /*
689   This only works correctly for square matrices where the subblock A->A is the
690    diagonal block
691 */
692 #undef __FUNC__
693 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
694 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
695 {
696   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
697   if (a->M != a->N)
698     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
699   if (a->rstart != a->cstart || a->rend != a->cend) {
700     SETERRQ(1,0,"row partition must equal col partition");  }
701   return MatGetDiagonal(a->A,v);
702 }
703 
704 #undef __FUNC__
705 #define __FUNC__ "MatScale_MPIAIJ"
706 int MatScale_MPIAIJ(Scalar *aa,Mat A)
707 {
708   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
709   int        ierr;
710   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
711   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
712   return 0;
713 }
714 
715 #undef __FUNC__
716 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
717 int MatDestroy_MPIAIJ(PetscObject obj)
718 {
719   Mat        mat = (Mat) obj;
720   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
721   int        ierr;
722 #if defined(PETSC_LOG)
723   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
724 #endif
725   PetscFree(aij->rowners);
726   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
727   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
728   if (aij->colmap) PetscFree(aij->colmap);
729   if (aij->garray) PetscFree(aij->garray);
730   if (aij->lvec)   VecDestroy(aij->lvec);
731   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
732   if (aij->rowvalues) PetscFree(aij->rowvalues);
733   PetscFree(aij);
734   if (mat->mapping) {
735     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
736   }
737   PLogObjectDestroy(mat);
738   PetscHeaderDestroy(mat);
739   return 0;
740 }
741 
742 #undef __FUNC__
743 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
744 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
745 {
746   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
747   int         ierr;
748 
749   if (aij->size == 1) {
750     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
751   }
752   else SETERRQ(1,0,"Only uniprocessor output supported");
753   return 0;
754 }
755 
756 #undef __FUNC__
757 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
758 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
759 {
760   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
761   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
762   int         ierr, format,shift = C->indexshift,rank;
763   FILE        *fd;
764   ViewerType  vtype;
765 
766   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
767   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
768     ierr = ViewerGetFormat(viewer,&format);
769     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
770       MatInfo info;
771       int     flg;
772       MPI_Comm_rank(mat->comm,&rank);
773       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
774       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
775       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
776       PetscSequentialPhaseBegin(mat->comm,1);
777       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
778          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
779       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
780          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
781       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
782       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
783       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
784       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
785       fflush(fd);
786       PetscSequentialPhaseEnd(mat->comm,1);
787       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
788       return 0;
789     }
790     else if (format == VIEWER_FORMAT_ASCII_INFO) {
791       return 0;
792     }
793   }
794 
795   if (vtype == DRAW_VIEWER) {
796     Draw       draw;
797     PetscTruth isnull;
798     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
799     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
800   }
801 
802   if (vtype == ASCII_FILE_VIEWER) {
803     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
804     PetscSequentialPhaseBegin(mat->comm,1);
805     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
806            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
807            aij->cend);
808     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
809     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
810     fflush(fd);
811     PetscSequentialPhaseEnd(mat->comm,1);
812   }
813   else {
814     int size = aij->size;
815     rank = aij->rank;
816     if (size == 1) {
817       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
818     }
819     else {
820       /* assemble the entire matrix onto first processor. */
821       Mat         A;
822       Mat_SeqAIJ *Aloc;
823       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
824       Scalar      *a;
825 
826       if (!rank) {
827         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
828                CHKERRQ(ierr);
829       }
830       else {
831         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
832                CHKERRQ(ierr);
833       }
834       PLogObjectParent(mat,A);
835 
836       /* copy over the A part */
837       Aloc = (Mat_SeqAIJ*) aij->A->data;
838       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
839       row = aij->rstart;
840       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
841       for ( i=0; i<m; i++ ) {
842         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
843         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
844       }
845       aj = Aloc->j;
846       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
847 
848       /* copy over the B part */
849       Aloc = (Mat_SeqAIJ*) aij->B->data;
850       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
851       row = aij->rstart;
852       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
853       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
854       for ( i=0; i<m; i++ ) {
855         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
856         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
857       }
858       PetscFree(ct);
859       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
860       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
861       if (!rank) {
862         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
863       }
864       ierr = MatDestroy(A); CHKERRQ(ierr);
865     }
866   }
867   return 0;
868 }
869 
870 #undef __FUNC__
871 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
872 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
873 {
874   Mat         mat = (Mat) obj;
875   int         ierr;
876   ViewerType  vtype;
877 
878   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
879   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
880       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
881     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
882   }
883   else if (vtype == BINARY_FILE_VIEWER) {
884     return MatView_MPIAIJ_Binary(mat,viewer);
885   }
886   return 0;
887 }
888 
889 /*
890     This has to provide several versions.
891 
892      2) a) use only local smoothing updating outer values only once.
893         b) local smoothing updating outer values each inner iteration
894      3) color updating out values betwen colors.
895 */
896 #undef __FUNC__
897 #define __FUNC__ "MatRelax_MPIAIJ"
898 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
899                            double fshift,int its,Vec xx)
900 {
901   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
902   Mat        AA = mat->A, BB = mat->B;
903   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
904   Scalar     *b,*x,*xs,*ls,d,*v,sum;
905   int        ierr,*idx, *diag;
906   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
907 
908   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
909   xs = x + shift; /* shift by one for index start of 1 */
910   ls = ls + shift;
911   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
912   diag = A->diag;
913   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
914     if (flag & SOR_ZERO_INITIAL_GUESS) {
915       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
916     }
917     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
918     CHKERRQ(ierr);
919     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
920     CHKERRQ(ierr);
921     while (its--) {
922       /* go down through the rows */
923       for ( i=0; i<m; i++ ) {
924         n    = A->i[i+1] - A->i[i];
925         idx  = A->j + A->i[i] + shift;
926         v    = A->a + A->i[i] + shift;
927         sum  = b[i];
928         SPARSEDENSEMDOT(sum,xs,v,idx,n);
929         d    = fshift + A->a[diag[i]+shift];
930         n    = B->i[i+1] - B->i[i];
931         idx  = B->j + B->i[i] + shift;
932         v    = B->a + B->i[i] + shift;
933         SPARSEDENSEMDOT(sum,ls,v,idx,n);
934         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
935       }
936       /* come up through the rows */
937       for ( i=m-1; i>-1; i-- ) {
938         n    = A->i[i+1] - A->i[i];
939         idx  = A->j + A->i[i] + shift;
940         v    = A->a + A->i[i] + shift;
941         sum  = b[i];
942         SPARSEDENSEMDOT(sum,xs,v,idx,n);
943         d    = fshift + A->a[diag[i]+shift];
944         n    = B->i[i+1] - B->i[i];
945         idx  = B->j + B->i[i] + shift;
946         v    = B->a + B->i[i] + shift;
947         SPARSEDENSEMDOT(sum,ls,v,idx,n);
948         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
949       }
950     }
951   }
952   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
953     if (flag & SOR_ZERO_INITIAL_GUESS) {
954       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
955     }
956     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
957     CHKERRQ(ierr);
958     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
959     CHKERRQ(ierr);
960     while (its--) {
961       for ( i=0; i<m; i++ ) {
962         n    = A->i[i+1] - A->i[i];
963         idx  = A->j + A->i[i] + shift;
964         v    = A->a + A->i[i] + shift;
965         sum  = b[i];
966         SPARSEDENSEMDOT(sum,xs,v,idx,n);
967         d    = fshift + A->a[diag[i]+shift];
968         n    = B->i[i+1] - B->i[i];
969         idx  = B->j + B->i[i] + shift;
970         v    = B->a + B->i[i] + shift;
971         SPARSEDENSEMDOT(sum,ls,v,idx,n);
972         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
973       }
974     }
975   }
976   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
977     if (flag & SOR_ZERO_INITIAL_GUESS) {
978       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
979     }
980     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
981                             mat->Mvctx); CHKERRQ(ierr);
982     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
983                             mat->Mvctx); CHKERRQ(ierr);
984     while (its--) {
985       for ( i=m-1; i>-1; i-- ) {
986         n    = A->i[i+1] - A->i[i];
987         idx  = A->j + A->i[i] + shift;
988         v    = A->a + A->i[i] + shift;
989         sum  = b[i];
990         SPARSEDENSEMDOT(sum,xs,v,idx,n);
991         d    = fshift + A->a[diag[i]+shift];
992         n    = B->i[i+1] - B->i[i];
993         idx  = B->j + B->i[i] + shift;
994         v    = B->a + B->i[i] + shift;
995         SPARSEDENSEMDOT(sum,ls,v,idx,n);
996         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
997       }
998     }
999   }
1000   else {
1001     SETERRQ(1,0,"Option not supported");
1002   }
1003   return 0;
1004 }
1005 
1006 #undef __FUNC__
1007 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1009 {
1010   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1011   Mat        A = mat->A, B = mat->B;
1012   int        ierr;
1013   double     isend[5], irecv[5];
1014 
1015   info->rows_global    = (double)mat->M;
1016   info->columns_global = (double)mat->N;
1017   info->rows_local     = (double)mat->m;
1018   info->columns_local  = (double)mat->N;
1019   info->block_size     = 1.0;
1020   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1021   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1022   isend[3] = info->memory;  isend[4] = info->mallocs;
1023   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1024   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1025   isend[3] += info->memory;  isend[4] += info->mallocs;
1026   if (flag == MAT_LOCAL) {
1027     info->nz_used      = isend[0];
1028     info->nz_allocated = isend[1];
1029     info->nz_unneeded  = isend[2];
1030     info->memory       = isend[3];
1031     info->mallocs      = isend[4];
1032   } else if (flag == MAT_GLOBAL_MAX) {
1033     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1034     info->nz_used      = irecv[0];
1035     info->nz_allocated = irecv[1];
1036     info->nz_unneeded  = irecv[2];
1037     info->memory       = irecv[3];
1038     info->mallocs      = irecv[4];
1039   } else if (flag == MAT_GLOBAL_SUM) {
1040     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1041     info->nz_used      = irecv[0];
1042     info->nz_allocated = irecv[1];
1043     info->nz_unneeded  = irecv[2];
1044     info->memory       = irecv[3];
1045     info->mallocs      = irecv[4];
1046   }
1047   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1048   info->fill_ratio_needed = 0;
1049   info->factor_mallocs    = 0;
1050 
1051   return 0;
1052 }
1053 
1054 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1055 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1056 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1057 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1058 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1059 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1060 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1061 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1062 
1063 #undef __FUNC__
1064 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1065 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1066 {
1067   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1068 
1069   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1070       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1071       op == MAT_COLUMNS_UNSORTED ||
1072       op == MAT_COLUMNS_SORTED ||
1073       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1074         MatSetOption(a->A,op);
1075         MatSetOption(a->B,op);
1076   } else if (op == MAT_ROW_ORIENTED) {
1077     a->roworiented = 1;
1078     MatSetOption(a->A,op);
1079     MatSetOption(a->B,op);
1080   } else if (op == MAT_ROWS_SORTED ||
1081              op == MAT_ROWS_UNSORTED ||
1082              op == MAT_SYMMETRIC ||
1083              op == MAT_STRUCTURALLY_SYMMETRIC ||
1084              op == MAT_YES_NEW_DIAGONALS)
1085     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1086   else if (op == MAT_COLUMN_ORIENTED) {
1087     a->roworiented = 0;
1088     MatSetOption(a->A,op);
1089     MatSetOption(a->B,op);
1090   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
1091     a->donotstash = 1;
1092   } else if (op == MAT_NO_NEW_DIAGONALS)
1093     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1094   else
1095     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1096   return 0;
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1101 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104   *m = mat->M; *n = mat->N;
1105   return 0;
1106 }
1107 
1108 #undef __FUNC__
1109 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1110 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1111 {
1112   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1113   *m = mat->m; *n = mat->N;
1114   return 0;
1115 }
1116 
1117 #undef __FUNC__
1118 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1119 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1120 {
1121   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1122   *m = mat->rstart; *n = mat->rend;
1123   return 0;
1124 }
1125 
1126 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1127 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1128 
1129 #undef __FUNC__
1130 #define __FUNC__ "MatGetRow_MPIAIJ"
1131 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1132 {
1133   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1134   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1135   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1136   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1137   int        *cmap, *idx_p;
1138 
1139   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1140   mat->getrowactive = PETSC_TRUE;
1141 
1142   if (!mat->rowvalues && (idx || v)) {
1143     /*
1144         allocate enough space to hold information from the longest row.
1145     */
1146     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1147     int     max = 1,m = mat->m,tmp;
1148     for ( i=0; i<m; i++ ) {
1149       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1150       if (max < tmp) { max = tmp; }
1151     }
1152     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1153     CHKPTRQ(mat->rowvalues);
1154     mat->rowindices = (int *) (mat->rowvalues + max);
1155   }
1156 
1157   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1158   lrow = row - rstart;
1159 
1160   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1161   if (!v)   {pvA = 0; pvB = 0;}
1162   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1163   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1164   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1165   nztot = nzA + nzB;
1166 
1167   cmap  = mat->garray;
1168   if (v  || idx) {
1169     if (nztot) {
1170       /* Sort by increasing column numbers, assuming A and B already sorted */
1171       int imark = -1;
1172       if (v) {
1173         *v = v_p = mat->rowvalues;
1174         for ( i=0; i<nzB; i++ ) {
1175           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1176           else break;
1177         }
1178         imark = i;
1179         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1180         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1181       }
1182       if (idx) {
1183         *idx = idx_p = mat->rowindices;
1184         if (imark > -1) {
1185           for ( i=0; i<imark; i++ ) {
1186             idx_p[i] = cmap[cworkB[i]];
1187           }
1188         } else {
1189           for ( i=0; i<nzB; i++ ) {
1190             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1191             else break;
1192           }
1193           imark = i;
1194         }
1195         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1196         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1197       }
1198     }
1199     else {
1200       if (idx) *idx = 0;
1201       if (v)   *v   = 0;
1202     }
1203   }
1204   *nz = nztot;
1205   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1206   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1207   return 0;
1208 }
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1212 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1213 {
1214   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1215   if (aij->getrowactive == PETSC_FALSE) {
1216     SETERRQ(1,0,"MatGetRow not called");
1217   }
1218   aij->getrowactive = PETSC_FALSE;
1219   return 0;
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatNorm_MPIAIJ"
1224 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1225 {
1226   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1227   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1228   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1229   double     sum = 0.0;
1230   Scalar     *v;
1231 
1232   if (aij->size == 1) {
1233     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1234   } else {
1235     if (type == NORM_FROBENIUS) {
1236       v = amat->a;
1237       for (i=0; i<amat->nz; i++ ) {
1238 #if defined(PETSC_COMPLEX)
1239         sum += real(conj(*v)*(*v)); v++;
1240 #else
1241         sum += (*v)*(*v); v++;
1242 #endif
1243       }
1244       v = bmat->a;
1245       for (i=0; i<bmat->nz; i++ ) {
1246 #if defined(PETSC_COMPLEX)
1247         sum += real(conj(*v)*(*v)); v++;
1248 #else
1249         sum += (*v)*(*v); v++;
1250 #endif
1251       }
1252       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1253       *norm = sqrt(*norm);
1254     }
1255     else if (type == NORM_1) { /* max column norm */
1256       double *tmp, *tmp2;
1257       int    *jj, *garray = aij->garray;
1258       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1259       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1260       PetscMemzero(tmp,aij->N*sizeof(double));
1261       *norm = 0.0;
1262       v = amat->a; jj = amat->j;
1263       for ( j=0; j<amat->nz; j++ ) {
1264         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1265       }
1266       v = bmat->a; jj = bmat->j;
1267       for ( j=0; j<bmat->nz; j++ ) {
1268         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1269       }
1270       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1271       for ( j=0; j<aij->N; j++ ) {
1272         if (tmp2[j] > *norm) *norm = tmp2[j];
1273       }
1274       PetscFree(tmp); PetscFree(tmp2);
1275     }
1276     else if (type == NORM_INFINITY) { /* max row norm */
1277       double ntemp = 0.0;
1278       for ( j=0; j<amat->m; j++ ) {
1279         v = amat->a + amat->i[j] + shift;
1280         sum = 0.0;
1281         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1282           sum += PetscAbsScalar(*v); v++;
1283         }
1284         v = bmat->a + bmat->i[j] + shift;
1285         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1286           sum += PetscAbsScalar(*v); v++;
1287         }
1288         if (sum > ntemp) ntemp = sum;
1289       }
1290       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1291     }
1292     else {
1293       SETERRQ(1,0,"No support for two norm");
1294     }
1295   }
1296   return 0;
1297 }
1298 
1299 #undef __FUNC__
1300 #define __FUNC__ "MatTranspose_MPIAIJ"
1301 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1302 {
1303   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1304   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1305   int        ierr,shift = Aloc->indexshift;
1306   Mat        B;
1307   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1308   Scalar     *array;
1309 
1310   if (matout == PETSC_NULL && M != N)
1311     SETERRQ(1,0,"Square matrix only for in-place");
1312   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1313          PETSC_NULL,&B); CHKERRQ(ierr);
1314 
1315   /* copy over the A part */
1316   Aloc = (Mat_SeqAIJ*) a->A->data;
1317   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1318   row = a->rstart;
1319   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1320   for ( i=0; i<m; i++ ) {
1321     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1322     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1323   }
1324   aj = Aloc->j;
1325   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1326 
1327   /* copy over the B part */
1328   Aloc = (Mat_SeqAIJ*) a->B->data;
1329   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1330   row = a->rstart;
1331   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1332   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1333   for ( i=0; i<m; i++ ) {
1334     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1335     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1336   }
1337   PetscFree(ct);
1338   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1339   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1340   if (matout != PETSC_NULL) {
1341     *matout = B;
1342   } else {
1343     /* This isn't really an in-place transpose .... but free data structures from a */
1344     PetscFree(a->rowners);
1345     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1346     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1347     if (a->colmap) PetscFree(a->colmap);
1348     if (a->garray) PetscFree(a->garray);
1349     if (a->lvec) VecDestroy(a->lvec);
1350     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1351     PetscFree(a);
1352     PetscMemcpy(A,B,sizeof(struct _Mat));
1353     PetscHeaderDestroy(B);
1354   }
1355   return 0;
1356 }
1357 
1358 #undef __FUNC__
1359 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1360 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1361 {
1362   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1363   Mat a = aij->A, b = aij->B;
1364   int ierr,s1,s2,s3;
1365 
1366   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1367   if (rr) {
1368     s3 = aij->n;
1369     VecGetLocalSize_Fast(rr,s1);
1370     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1371     /* Overlap communication with computation. */
1372     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1373   }
1374   if (ll) {
1375     VecGetLocalSize_Fast(ll,s1);
1376     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1377     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1378   }
1379   /* scale  the diagonal block */
1380   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1381 
1382   if (rr) {
1383     /* Do a scatter end and then right scale the off-diagonal block */
1384     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1385     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1386   }
1387 
1388   return 0;
1389 }
1390 
1391 
1392 extern int MatPrintHelp_SeqAIJ(Mat);
1393 #undef __FUNC__
1394 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1395 int MatPrintHelp_MPIAIJ(Mat A)
1396 {
1397   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1398 
1399   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1400   else return 0;
1401 }
1402 
1403 #undef __FUNC__
1404 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1405 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1406 {
1407   *bs = 1;
1408   return 0;
1409 }
1410 #undef __FUNC__
1411 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1412 int MatSetUnfactored_MPIAIJ(Mat A)
1413 {
1414   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1415   int        ierr;
1416   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1417   return 0;
1418 }
1419 
1420 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1421 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1422 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1423 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1424 /* -------------------------------------------------------------------*/
1425 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1426        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1427        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1428        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1429        0,0,
1430        0,0,
1431        0,0,
1432        MatRelax_MPIAIJ,
1433        MatTranspose_MPIAIJ,
1434        MatGetInfo_MPIAIJ,0,
1435        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1436        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1437        0,
1438        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1439        0,0,0,0,
1440        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1441        0,0,
1442        0,0,MatConvertSameType_MPIAIJ,0,0,
1443        0,0,0,
1444        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1445        MatPrintHelp_MPIAIJ,
1446        MatScale_MPIAIJ,0,0,0,
1447        MatGetBlockSize_MPIAIJ,0,0,0,0,
1448        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1449 
1450 
1451 #undef __FUNC__
1452 #define __FUNC__ "MatCreateMPIAIJ"
1453 /*@C
1454    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1455    (the default parallel PETSc format).  For good matrix assembly performance
1456    the user should preallocate the matrix storage by setting the parameters
1457    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1458    performance can be increased by more than a factor of 50.
1459 
1460    Input Parameters:
1461 .  comm - MPI communicator
1462 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1463            This value should be the same as the local size used in creating the
1464            y vector for the matrix-vector product y = Ax.
1465 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1466            This value should be the same as the local size used in creating the
1467            x vector for the matrix-vector product y = Ax.
1468 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1469 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1470 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1471            (same for all local rows)
1472 .  d_nzz - array containing the number of nonzeros in the various rows of the
1473            diagonal portion of the local submatrix (possibly different for each row)
1474            or PETSC_NULL. You must leave room for the diagonal entry even if
1475            it is zero.
1476 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1477            submatrix (same for all local rows).
1478 .  o_nzz - array containing the number of nonzeros in the various rows of the
1479            off-diagonal portion of the local submatrix (possibly different for
1480            each row) or PETSC_NULL.
1481 
1482    Output Parameter:
1483 .  A - the matrix
1484 
1485    Notes:
1486    The AIJ format (also called the Yale sparse matrix format or
1487    compressed row storage), is fully compatible with standard Fortran 77
1488    storage.  That is, the stored row and column indices can begin at
1489    either one (as in Fortran) or zero.  See the users manual for details.
1490 
1491    The user MUST specify either the local or global matrix dimensions
1492    (possibly both).
1493 
1494    By default, this format uses inodes (identical nodes) when possible.
1495    We search for consecutive rows with the same nonzero structure, thereby
1496    reusing matrix information to achieve increased efficiency.
1497 
1498    Options Database Keys:
1499 $    -mat_aij_no_inode  - Do not use inodes
1500 $    -mat_aij_inode_limit <limit> - Set inode limit.
1501 $        (max limit=5)
1502 $    -mat_aij_oneindex - Internally use indexing starting at 1
1503 $        rather than 0.  Note: When calling MatSetValues(),
1504 $        the user still MUST index entries starting at 0!
1505 
1506    Storage Information:
1507    For a square global matrix we define each processor's diagonal portion
1508    to be its local rows and the corresponding columns (a square submatrix);
1509    each processor's off-diagonal portion encompasses the remainder of the
1510    local matrix (a rectangular submatrix).
1511 
1512    The user can specify preallocated storage for the diagonal part of
1513    the local submatrix with either d_nz or d_nnz (not both).  Set
1514    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1515    memory allocation.  Likewise, specify preallocated storage for the
1516    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1517 
1518    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1519    the figure below we depict these three local rows and all columns (0-11).
1520 
1521 $          0 1 2 3 4 5 6 7 8 9 10 11
1522 $         -------------------
1523 $  row 3  |  o o o d d d o o o o o o
1524 $  row 4  |  o o o d d d o o o o o o
1525 $  row 5  |  o o o d d d o o o o o o
1526 $         -------------------
1527 $
1528 
1529    Thus, any entries in the d locations are stored in the d (diagonal)
1530    submatrix, and any entries in the o locations are stored in the
1531    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1532    stored simply in the MATSEQAIJ format for compressed row storage.
1533 
1534    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1535    and o_nz should indicate the number of nonzeros per row in the o matrix.
1536    In general, for PDE problems in which most nonzeros are near the diagonal,
1537    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1538    or you will get TERRIBLE performance; see the users' manual chapter on
1539    matrices.
1540 
1541 .keywords: matrix, aij, compressed row, sparse, parallel
1542 
1543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1544 @*/
1545 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1546                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1547 {
1548   Mat          B;
1549   Mat_MPIAIJ   *b;
1550   int          ierr, i,sum[2],work[2],size;
1551 
1552   *A = 0;
1553   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1554   PLogObjectCreate(B);
1555   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1556   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1557   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1558   MPI_Comm_size(comm,&size);
1559   if (size == 1) {
1560     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1561     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1562     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1563     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1564     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1565     B->ops.solve             = MatSolve_MPIAIJ;
1566     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1567     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1568     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1569     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1570   }
1571   B->destroy    = MatDestroy_MPIAIJ;
1572   B->view       = MatView_MPIAIJ;
1573   B->factor     = 0;
1574   B->assembled  = PETSC_FALSE;
1575   B->mapping    = 0;
1576 
1577   B->insertmode = NOT_SET_VALUES;
1578   MPI_Comm_rank(comm,&b->rank);
1579   MPI_Comm_size(comm,&b->size);
1580 
1581   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1582     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1583 
1584   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1585     work[0] = m; work[1] = n;
1586     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1587     if (M == PETSC_DECIDE) M = sum[0];
1588     if (N == PETSC_DECIDE) N = sum[1];
1589   }
1590   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1591   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1592   b->m = m; B->m = m;
1593   b->n = n; B->n = n;
1594   b->N = N; B->N = N;
1595   b->M = M; B->M = M;
1596 
1597   /* build local table of row and column ownerships */
1598   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1599   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1600   b->cowners = b->rowners + b->size + 2;
1601   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1602   b->rowners[0] = 0;
1603   for ( i=2; i<=b->size; i++ ) {
1604     b->rowners[i] += b->rowners[i-1];
1605   }
1606   b->rstart = b->rowners[b->rank];
1607   b->rend   = b->rowners[b->rank+1];
1608   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1609   b->cowners[0] = 0;
1610   for ( i=2; i<=b->size; i++ ) {
1611     b->cowners[i] += b->cowners[i-1];
1612   }
1613   b->cstart = b->cowners[b->rank];
1614   b->cend   = b->cowners[b->rank+1];
1615 
1616   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1617   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1618   PLogObjectParent(B,b->A);
1619   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1620   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1621   PLogObjectParent(B,b->B);
1622 
1623   /* build cache for off array entries formed */
1624   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1625   b->donotstash  = 0;
1626   b->colmap      = 0;
1627   b->garray      = 0;
1628   b->roworiented = 1;
1629 
1630   /* stuff used for matrix vector multiply */
1631   b->lvec      = 0;
1632   b->Mvctx     = 0;
1633 
1634   /* stuff for MatGetRow() */
1635   b->rowindices   = 0;
1636   b->rowvalues    = 0;
1637   b->getrowactive = PETSC_FALSE;
1638 
1639   *A = B;
1640   return 0;
1641 }
1642 
1643 #undef __FUNC__
1644 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1645 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1646 {
1647   Mat        mat;
1648   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1649   int        ierr, len=0, flg;
1650 
1651   *newmat       = 0;
1652   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1653   PLogObjectCreate(mat);
1654   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1655   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1656   mat->destroy    = MatDestroy_MPIAIJ;
1657   mat->view       = MatView_MPIAIJ;
1658   mat->factor     = matin->factor;
1659   mat->assembled  = PETSC_TRUE;
1660 
1661   a->m = mat->m   = oldmat->m;
1662   a->n = mat->n   = oldmat->n;
1663   a->M = mat->M   = oldmat->M;
1664   a->N = mat->N   = oldmat->N;
1665 
1666   a->rstart       = oldmat->rstart;
1667   a->rend         = oldmat->rend;
1668   a->cstart       = oldmat->cstart;
1669   a->cend         = oldmat->cend;
1670   a->size         = oldmat->size;
1671   a->rank         = oldmat->rank;
1672   mat->insertmode = NOT_SET_VALUES;
1673   a->rowvalues    = 0;
1674   a->getrowactive = PETSC_FALSE;
1675 
1676   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1677   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1678   a->cowners = a->rowners + a->size + 2;
1679   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1680   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1681   if (oldmat->colmap) {
1682     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1683     PLogObjectMemory(mat,(a->N)*sizeof(int));
1684     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1685   } else a->colmap = 0;
1686   if (oldmat->garray) {
1687     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1688     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1689     PLogObjectMemory(mat,len*sizeof(int));
1690     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1691   } else a->garray = 0;
1692 
1693   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1694   PLogObjectParent(mat,a->lvec);
1695   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1696   PLogObjectParent(mat,a->Mvctx);
1697   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1698   PLogObjectParent(mat,a->A);
1699   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1700   PLogObjectParent(mat,a->B);
1701   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1702   if (flg) {
1703     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1704   }
1705   *newmat = mat;
1706   return 0;
1707 }
1708 
1709 #include "sys.h"
1710 
1711 #undef __FUNC__
1712 #define __FUNC__ "MatLoad_MPIAIJ"
1713 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1714 {
1715   Mat          A;
1716   int          i, nz, ierr, j,rstart, rend, fd;
1717   Scalar       *vals,*svals;
1718   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1719   MPI_Status   status;
1720   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1721   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1722   int          tag = ((PetscObject)viewer)->tag;
1723 
1724   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1725   if (!rank) {
1726     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1727     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1728     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1729   }
1730 
1731   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1732   M = header[1]; N = header[2];
1733   /* determine ownership of all rows */
1734   m = M/size + ((M % size) > rank);
1735   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1736   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1737   rowners[0] = 0;
1738   for ( i=2; i<=size; i++ ) {
1739     rowners[i] += rowners[i-1];
1740   }
1741   rstart = rowners[rank];
1742   rend   = rowners[rank+1];
1743 
1744   /* distribute row lengths to all processors */
1745   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1746   offlens = ourlens + (rend-rstart);
1747   if (!rank) {
1748     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1749     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1750     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1751     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1752     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1753     PetscFree(sndcounts);
1754   }
1755   else {
1756     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1757   }
1758 
1759   if (!rank) {
1760     /* calculate the number of nonzeros on each processor */
1761     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1762     PetscMemzero(procsnz,size*sizeof(int));
1763     for ( i=0; i<size; i++ ) {
1764       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1765         procsnz[i] += rowlengths[j];
1766       }
1767     }
1768     PetscFree(rowlengths);
1769 
1770     /* determine max buffer needed and allocate it */
1771     maxnz = 0;
1772     for ( i=0; i<size; i++ ) {
1773       maxnz = PetscMax(maxnz,procsnz[i]);
1774     }
1775     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1776 
1777     /* read in my part of the matrix column indices  */
1778     nz = procsnz[0];
1779     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1780     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1781 
1782     /* read in every one elses and ship off */
1783     for ( i=1; i<size; i++ ) {
1784       nz = procsnz[i];
1785       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1786       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1787     }
1788     PetscFree(cols);
1789   }
1790   else {
1791     /* determine buffer space needed for message */
1792     nz = 0;
1793     for ( i=0; i<m; i++ ) {
1794       nz += ourlens[i];
1795     }
1796     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1797 
1798     /* receive message of column indices*/
1799     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1800     MPI_Get_count(&status,MPI_INT,&maxnz);
1801     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1802   }
1803 
1804   /* loop over local rows, determining number of off diagonal entries */
1805   PetscMemzero(offlens,m*sizeof(int));
1806   jj = 0;
1807   for ( i=0; i<m; i++ ) {
1808     for ( j=0; j<ourlens[i]; j++ ) {
1809       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1810       jj++;
1811     }
1812   }
1813 
1814   /* create our matrix */
1815   for ( i=0; i<m; i++ ) {
1816     ourlens[i] -= offlens[i];
1817   }
1818   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1819   A = *newmat;
1820   MatSetOption(A,MAT_COLUMNS_SORTED);
1821   for ( i=0; i<m; i++ ) {
1822     ourlens[i] += offlens[i];
1823   }
1824 
1825   if (!rank) {
1826     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1827 
1828     /* read in my part of the matrix numerical values  */
1829     nz = procsnz[0];
1830     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1831 
1832     /* insert into matrix */
1833     jj      = rstart;
1834     smycols = mycols;
1835     svals   = vals;
1836     for ( i=0; i<m; i++ ) {
1837       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1838       smycols += ourlens[i];
1839       svals   += ourlens[i];
1840       jj++;
1841     }
1842 
1843     /* read in other processors and ship out */
1844     for ( i=1; i<size; i++ ) {
1845       nz = procsnz[i];
1846       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1847       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1848     }
1849     PetscFree(procsnz);
1850   }
1851   else {
1852     /* receive numeric values */
1853     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1854 
1855     /* receive message of values*/
1856     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1857     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1858     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1859 
1860     /* insert into matrix */
1861     jj      = rstart;
1862     smycols = mycols;
1863     svals   = vals;
1864     for ( i=0; i<m; i++ ) {
1865       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1866       smycols += ourlens[i];
1867       svals   += ourlens[i];
1868       jj++;
1869     }
1870   }
1871   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1872 
1873   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1874   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1875   return 0;
1876 }
1877