xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bd7f49f52f2557a78727fcb3db5d0e2e15f1ec40)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.214 1997/08/22 15:13:39 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #define CHUNKSIZE   15
33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
34 { \
35  \
36     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
37     rmax = aimax[row]; nrow = ailen[row];  \
38     col1 = col - shift; \
39      \
40     low = 0; high = nrow; \
41     while (high-low > 5) { \
42       t = (low+high)/2; \
43       if (rp[t] > col) high = t; \
44       else             low  = t; \
45     } \
46       for ( _i=0; _i<nrow; _i++ ) { \
47         if (rp[_i] > col1) break; \
48         if (rp[_i] == col1) { \
49           if (addv == ADD_VALUES) ap[_i] += value;   \
50           else                  ap[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (nonew == 1) goto a_noinsert; \
55       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
56       if (nrow >= rmax) { \
57         /* there is no extra room in row, therefore enlarge */ \
58         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
59         Scalar *new_a; \
60  \
61         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
62  \
63         /* malloc new storage space */ \
64         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
65         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
66         new_j   = (int *) (new_a + new_nz); \
67         new_i   = new_j + new_nz; \
68  \
69         /* copy over old data into new slots */ \
70         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
71         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
72         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
73         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
74         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
75                                                            len*sizeof(int)); \
76         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
77         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
78                                                            len*sizeof(Scalar));  \
79         /* free up old matrix storage */ \
80  \
81         PetscFree(a->a);  \
82         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
83         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
84         a->singlemalloc = 1; \
85  \
86         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
87         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
88         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
89         a->maxnz += CHUNKSIZE; \
90         a->reallocs++; \
91       } \
92       N = nrow++ - 1; a->nz++; \
93       /* shift up all the later entries in this row */ \
94       for ( ii=N; ii>=_i; ii-- ) { \
95         rp[ii+1] = rp[ii]; \
96         ap[ii+1] = ap[ii]; \
97       } \
98       rp[_i] = col1;  \
99       ap[_i] = value;  \
100       a_noinsert: ; \
101       ailen[row] = nrow; \
102 }
103 
104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
105 { \
106  \
107     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
108     rmax = bimax[row]; nrow = bilen[row];  \
109     col1 = col - shift; \
110      \
111     low = 0; high = nrow; \
112     while (high-low > 5) { \
113       t = (low+high)/2; \
114       if (rp[t] > col) high = t; \
115       else             low  = t; \
116     } \
117        for ( _i=0; _i<nrow; _i++ ) { \
118         if (rp[_i] > col1) break; \
119         if (rp[_i] == col1) { \
120           if (addv == ADD_VALUES) ap[_i] += value;   \
121           else                  ap[_i] = value; \
122           goto b_noinsert; \
123         } \
124       }  \
125       if (nonew == 1) goto b_noinsert; \
126       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
127       if (nrow >= rmax) { \
128         /* there is no extra room in row, therefore enlarge */ \
129         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
130         Scalar *new_a; \
131  \
132         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
133  \
134         /* malloc new storage space */ \
135         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
136         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
137         new_j   = (int *) (new_a + new_nz); \
138         new_i   = new_j + new_nz; \
139  \
140         /* copy over old data into new slots */ \
141         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
142         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
143         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
144         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
145         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
146                                                            len*sizeof(int)); \
147         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
148         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
149                                                            len*sizeof(Scalar));  \
150         /* free up old matrix storage */ \
151  \
152         PetscFree(b->a);  \
153         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
154         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
155         b->singlemalloc = 1; \
156  \
157         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
158         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
159         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
160         b->maxnz += CHUNKSIZE; \
161         b->reallocs++; \
162       } \
163       N = nrow++ - 1; b->nz++; \
164       /* shift up all the later entries in this row */ \
165       for ( ii=N; ii>=_i; ii-- ) { \
166         rp[ii+1] = rp[ii]; \
167         ap[ii+1] = ap[ii]; \
168       } \
169       rp[_i] = col1;  \
170       ap[_i] = value;  \
171       b_noinsert: ; \
172       bilen[row] = nrow; \
173 }
174 
175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
176 #undef __FUNC__
177 #define __FUNC__ "MatSetValues_MPIAIJ"
178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
179 {
180   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
181   Scalar     value;
182   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
183   int        cstart = aij->cstart, cend = aij->cend,row,col;
184   int        roworiented = aij->roworiented;
185 
186   /* Some Variables required in the macro */
187   Mat        A = aij->A;
188   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
189   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
190   Scalar     *aa = a->a;
191 
192   Mat        B = aij->B;
193   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
194   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
195   Scalar     *ba = b->a;
196 
197   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
198   int        nonew = a->nonew,shift = a->indexshift;
199   Scalar     *ap;
200 
201   for ( i=0; i<m; i++ ) {
202 #if defined(PETSC_BOPT_g)
203     if (im[i] < 0) SETERRQ(1,0,"Negative row");
204     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
205 #endif
206     if (im[i] >= rstart && im[i] < rend) {
207       row = im[i] - rstart;
208       for ( j=0; j<n; j++ ) {
209         if (in[j] >= cstart && in[j] < cend){
210           col = in[j] - cstart;
211           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
212           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
213           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215 #if defined(PETSC_BOPT_g)
216         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
217         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!aij->colmap) {
222               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
223             }
224             col = aij->colmap[in[j]] - 1;
225             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
226               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
227               col =  in[j];
228               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
229               B = aij->B;
230               b = (Mat_SeqAIJ *) B->data;
231               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
232               ba = b->a;
233             }
234           }
235           else col = in[j];
236           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
237           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
238           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
239         }
240       }
241     }
242     else {
243       if (roworiented && !aij->donotstash) {
244         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
245       }
246       else {
247         if (!aij->donotstash) {
248           row = im[i];
249           for ( j=0; j<n; j++ ) {
250             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
251           }
252         }
253       }
254     }
255   }
256   return 0;
257 }
258 
259 #undef __FUNC__
260 #define __FUNC__ "MatGetValues_MPIAIJ"
261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
262 {
263   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
264   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
265   int        cstart = aij->cstart, cend = aij->cend,row,col;
266 
267   for ( i=0; i<m; i++ ) {
268     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
269     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
270     if (idxm[i] >= rstart && idxm[i] < rend) {
271       row = idxm[i] - rstart;
272       for ( j=0; j<n; j++ ) {
273         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
274         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
275         if (idxn[j] >= cstart && idxn[j] < cend){
276           col = idxn[j] - cstart;
277           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
278         }
279         else {
280           if (!aij->colmap) {
281             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
282           }
283           col = aij->colmap[idxn[j]] - 1;
284           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
285           else {
286             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
287           }
288         }
289       }
290     }
291     else {
292       SETERRQ(1,0,"Only local values currently supported");
293     }
294   }
295   return 0;
296 }
297 
298 #undef __FUNC__
299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
303   MPI_Comm    comm = mat->comm;
304   int         size = aij->size, *owners = aij->rowners;
305   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
306   MPI_Request *send_waits,*recv_waits;
307   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
308   InsertMode  addv;
309   Scalar      *rvalues,*svalues;
310 
311   /* make sure all processors are either in INSERTMODE or ADDMODE */
312   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
313   if (addv == (ADD_VALUES|INSERT_VALUES)) {
314     SETERRQ(1,0,"Some processors inserted others added");
315   }
316   mat->insertmode = addv; /* in case this processor had no cache */
317 
318   /*  first count number of contributors to each processor */
319   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
320   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
321   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
322   for ( i=0; i<aij->stash.n; i++ ) {
323     idx = aij->stash.idx[i];
324     for ( j=0; j<size; j++ ) {
325       if (idx >= owners[j] && idx < owners[j+1]) {
326         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
327       }
328     }
329   }
330   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
331 
332   /* inform other processors of number of messages and max length*/
333   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
334   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
335   nreceives = work[rank];
336   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
337   nmax = work[rank];
338   PetscFree(work);
339 
340   /* post receives:
341        1) each message will consist of ordered pairs
342      (global index,value) we store the global index as a double
343      to simplify the message passing.
344        2) since we don't know how long each individual message is we
345      allocate the largest needed buffer for each receive. Potentially
346      this is a lot of wasted space.
347 
348 
349        This could be done better.
350   */
351   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
352   CHKPTRQ(rvalues);
353   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
354   CHKPTRQ(recv_waits);
355   for ( i=0; i<nreceives; i++ ) {
356     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
357               comm,recv_waits+i);
358   }
359 
360   /* do sends:
361       1) starts[i] gives the starting index in svalues for stuff going to
362          the ith processor
363   */
364   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
365   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
366   CHKPTRQ(send_waits);
367   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
368   starts[0] = 0;
369   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
370   for ( i=0; i<aij->stash.n; i++ ) {
371     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
372     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
373     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
374   }
375   PetscFree(owner);
376   starts[0] = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
382                 comm,send_waits+count++);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   return 0;
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   /*  wait on receives */
412   while (count) {
413     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
414     /* unpack receives into our local space */
415     values = aij->rvalues + 3*imdex*aij->rmax;
416     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
417     n = n/3;
418     for ( i=0; i<n; i++ ) {
419       row = (int) PetscReal(values[3*i]) - aij->rstart;
420       col = (int) PetscReal(values[3*i+1]);
421       val = values[3*i+2];
422       if (col >= aij->cstart && col < aij->cend) {
423         col -= aij->cstart;
424         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
425       }
426       else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   return 0;
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
479   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
480   return 0;
481 }
482 
483 /* the code does not do the diagonal entries correctly unless the
484    matrix is square and the column and row owerships are identical.
485    This is a BUG. The only way to fix it seems to be to access
486    aij->A and aij->B directly and not through the MatZeroRows()
487    routine.
488 */
489 #undef __FUNC__
490 #define __FUNC__ "MatZeroRows_MPIAIJ"
491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
492 {
493   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
494   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
495   int            *procs,*nprocs,j,found,idx,nsends,*work;
496   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
497   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
498   int            *lens,imdex,*lrows,*values;
499   MPI_Comm       comm = A->comm;
500   MPI_Request    *send_waits,*recv_waits;
501   MPI_Status     recv_status,*send_status;
502   IS             istmp;
503 
504   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
505   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
506 
507   /*  first count number of contributors to each processor */
508   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
509   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
510   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
511   for ( i=0; i<N; i++ ) {
512     idx = rows[i];
513     found = 0;
514     for ( j=0; j<size; j++ ) {
515       if (idx >= owners[j] && idx < owners[j+1]) {
516         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
517       }
518     }
519     if (!found) SETERRQ(1,0,"Index out of range");
520   }
521   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
522 
523   /* inform other processors of number of messages and max length*/
524   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
525   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
526   nrecvs = work[rank];
527   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
528   nmax = work[rank];
529   PetscFree(work);
530 
531   /* post receives:   */
532   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
533   CHKPTRQ(rvalues);
534   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
535   CHKPTRQ(recv_waits);
536   for ( i=0; i<nrecvs; i++ ) {
537     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
538   }
539 
540   /* do sends:
541       1) starts[i] gives the starting index in svalues for stuff going to
542          the ith processor
543   */
544   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
545   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
546   CHKPTRQ(send_waits);
547   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
548   starts[0] = 0;
549   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
550   for ( i=0; i<N; i++ ) {
551     svalues[starts[owner[i]]++] = rows[i];
552   }
553   ISRestoreIndices(is,&rows);
554 
555   starts[0] = 0;
556   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
557   count = 0;
558   for ( i=0; i<size; i++ ) {
559     if (procs[i]) {
560       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
561     }
562   }
563   PetscFree(starts);
564 
565   base = owners[rank];
566 
567   /*  wait on receives */
568   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
569   source = lens + nrecvs;
570   count  = nrecvs; slen = 0;
571   while (count) {
572     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
573     /* unpack receives into our local space */
574     MPI_Get_count(&recv_status,MPI_INT,&n);
575     source[imdex]  = recv_status.MPI_SOURCE;
576     lens[imdex]  = n;
577     slen += n;
578     count--;
579   }
580   PetscFree(recv_waits);
581 
582   /* move the data into the send scatter */
583   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
584   count = 0;
585   for ( i=0; i<nrecvs; i++ ) {
586     values = rvalues + i*nmax;
587     for ( j=0; j<lens[i]; j++ ) {
588       lrows[count++] = values[j] - base;
589     }
590   }
591   PetscFree(rvalues); PetscFree(lens);
592   PetscFree(owner); PetscFree(nprocs);
593 
594   /* actually zap the local rows */
595   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
596   PLogObjectParent(A,istmp);
597   PetscFree(lrows);
598   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
599   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
600   ierr = ISDestroy(istmp); CHKERRQ(ierr);
601 
602   /* wait on sends */
603   if (nsends) {
604     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
605     CHKPTRQ(send_status);
606     MPI_Waitall(nsends,send_waits,send_status);
607     PetscFree(send_status);
608   }
609   PetscFree(send_waits); PetscFree(svalues);
610 
611   return 0;
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatMult_MPIAIJ"
616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
617 {
618   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
619   int        ierr,nt;
620 
621   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
622   if (nt != a->n) {
623     SETERRQ(1,0,"Incompatible partition of A and xx");
624   }
625   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
626   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
627   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
628   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
629   return 0;
630 }
631 
632 #undef __FUNC__
633 #define __FUNC__ "MatMultAdd_MPIAIJ"
634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
635 {
636   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
637   int        ierr;
638   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
639   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
640   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
641   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
642   return 0;
643 }
644 
645 #undef __FUNC__
646 #define __FUNC__ "MatMultTrans_MPIAIJ"
647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
648 {
649   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
650   int        ierr;
651 
652   /* do nondiagonal part */
653   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
654   /* send it on its way */
655   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
656   /* do local part */
657   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
658   /* receive remote parts: note this assumes the values are not actually */
659   /* inserted in yy until the next line, which is true for my implementation*/
660   /* but is not perhaps always true. */
661   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
662   return 0;
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   /* do nondiagonal part */
673   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
674   /* send it on its way */
675   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
676   /* do local part */
677   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
678   /* receive remote parts: note this assumes the values are not actually */
679   /* inserted in yy until the next line, which is true for my implementation*/
680   /* but is not perhaps always true. */
681   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682   return 0;
683 }
684 
685 /*
686   This only works correctly for square matrices where the subblock A->A is the
687    diagonal block
688 */
689 #undef __FUNC__
690 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
692 {
693   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
694   if (a->M != a->N)
695     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
696   if (a->rstart != a->cstart || a->rend != a->cend) {
697     SETERRQ(1,0,"row partition must equal col partition");  }
698   return MatGetDiagonal(a->A,v);
699 }
700 
701 #undef __FUNC__
702 #define __FUNC__ "MatScale_MPIAIJ"
703 int MatScale_MPIAIJ(Scalar *aa,Mat A)
704 {
705   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
706   int        ierr;
707   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
708   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
709   return 0;
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatDestroy_MPIAIJ"
714 int MatDestroy_MPIAIJ(PetscObject obj)
715 {
716   Mat        mat = (Mat) obj;
717   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
718   int        ierr;
719 
720 #if defined(PETSC_LOG)
721   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
722 #endif
723   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
724   PetscFree(aij->rowners);
725   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
726   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
727   if (aij->colmap) PetscFree(aij->colmap);
728   if (aij->garray) PetscFree(aij->garray);
729   if (aij->lvec)   VecDestroy(aij->lvec);
730   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
731   if (aij->rowvalues) PetscFree(aij->rowvalues);
732   PetscFree(aij);
733   PLogObjectDestroy(mat);
734   PetscHeaderDestroy(mat);
735   return 0;
736 }
737 
738 #undef __FUNC__
739 #define __FUNC__ "MatView_MPIAIJ_Binary"
740 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
741 {
742   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
743   int         ierr;
744 
745   if (aij->size == 1) {
746     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
747   }
748   else SETERRQ(1,0,"Only uniprocessor output supported");
749   return 0;
750 }
751 
752 #undef __FUNC__
753 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
754 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
757   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
758   int         ierr, format,shift = C->indexshift,rank;
759   FILE        *fd;
760   ViewerType  vtype;
761 
762   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
763   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
764     ierr = ViewerGetFormat(viewer,&format);
765     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
766       MatInfo info;
767       int     flg;
768       MPI_Comm_rank(mat->comm,&rank);
769       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
770       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
771       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
772       PetscSequentialPhaseBegin(mat->comm,1);
773       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
774          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
775       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
776          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
777       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
778       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
779       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
780       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
781       fflush(fd);
782       PetscSequentialPhaseEnd(mat->comm,1);
783       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
784       return 0;
785     }
786     else if (format == VIEWER_FORMAT_ASCII_INFO) {
787       return 0;
788     }
789   }
790 
791   if (vtype == DRAW_VIEWER) {
792     Draw       draw;
793     PetscTruth isnull;
794     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
795     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
796   }
797 
798   if (vtype == ASCII_FILE_VIEWER) {
799     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
800     PetscSequentialPhaseBegin(mat->comm,1);
801     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
802            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
803            aij->cend);
804     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
805     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
806     fflush(fd);
807     PetscSequentialPhaseEnd(mat->comm,1);
808   }
809   else {
810     int size = aij->size;
811     rank = aij->rank;
812     if (size == 1) {
813       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
814     }
815     else {
816       /* assemble the entire matrix onto first processor. */
817       Mat         A;
818       Mat_SeqAIJ *Aloc;
819       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
820       Scalar      *a;
821 
822       if (!rank) {
823         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
824                CHKERRQ(ierr);
825       }
826       else {
827         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
828                CHKERRQ(ierr);
829       }
830       PLogObjectParent(mat,A);
831 
832       /* copy over the A part */
833       Aloc = (Mat_SeqAIJ*) aij->A->data;
834       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
835       row = aij->rstart;
836       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
837       for ( i=0; i<m; i++ ) {
838         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
839         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
840       }
841       aj = Aloc->j;
842       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
843 
844       /* copy over the B part */
845       Aloc = (Mat_SeqAIJ*) aij->B->data;
846       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
847       row = aij->rstart;
848       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
849       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
850       for ( i=0; i<m; i++ ) {
851         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
852         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
853       }
854       PetscFree(ct);
855       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
856       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
857       if (!rank) {
858         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
859       }
860       ierr = MatDestroy(A); CHKERRQ(ierr);
861     }
862   }
863   return 0;
864 }
865 
866 #undef __FUNC__
867 #define __FUNC__ "MatView_MPIAIJ"
868 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
869 {
870   Mat         mat = (Mat) obj;
871   int         ierr;
872   ViewerType  vtype;
873 
874   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
875   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
876       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
877     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
878   }
879   else if (vtype == BINARY_FILE_VIEWER) {
880     return MatView_MPIAIJ_Binary(mat,viewer);
881   }
882   return 0;
883 }
884 
885 /*
886     This has to provide several versions.
887 
888      2) a) use only local smoothing updating outer values only once.
889         b) local smoothing updating outer values each inner iteration
890      3) color updating out values betwen colors.
891 */
892 #undef __FUNC__
893 #define __FUNC__ "MatRelax_MPIAIJ"
894 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
895                            double fshift,int its,Vec xx)
896 {
897   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
898   Mat        AA = mat->A, BB = mat->B;
899   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
900   Scalar     *b,*x,*xs,*ls,d,*v,sum;
901   int        ierr,*idx, *diag;
902   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
903 
904   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
905   xs = x + shift; /* shift by one for index start of 1 */
906   ls = ls + shift;
907   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
908   diag = A->diag;
909   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
910     if (flag & SOR_ZERO_INITIAL_GUESS) {
911       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
912     }
913     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
914     CHKERRQ(ierr);
915     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
916     CHKERRQ(ierr);
917     while (its--) {
918       /* go down through the rows */
919       for ( i=0; i<m; i++ ) {
920         n    = A->i[i+1] - A->i[i];
921         idx  = A->j + A->i[i] + shift;
922         v    = A->a + A->i[i] + shift;
923         sum  = b[i];
924         SPARSEDENSEMDOT(sum,xs,v,idx,n);
925         d    = fshift + A->a[diag[i]+shift];
926         n    = B->i[i+1] - B->i[i];
927         idx  = B->j + B->i[i] + shift;
928         v    = B->a + B->i[i] + shift;
929         SPARSEDENSEMDOT(sum,ls,v,idx,n);
930         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
931       }
932       /* come up through the rows */
933       for ( i=m-1; i>-1; i-- ) {
934         n    = A->i[i+1] - A->i[i];
935         idx  = A->j + A->i[i] + shift;
936         v    = A->a + A->i[i] + shift;
937         sum  = b[i];
938         SPARSEDENSEMDOT(sum,xs,v,idx,n);
939         d    = fshift + A->a[diag[i]+shift];
940         n    = B->i[i+1] - B->i[i];
941         idx  = B->j + B->i[i] + shift;
942         v    = B->a + B->i[i] + shift;
943         SPARSEDENSEMDOT(sum,ls,v,idx,n);
944         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
945       }
946     }
947   }
948   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
949     if (flag & SOR_ZERO_INITIAL_GUESS) {
950       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
951     }
952     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
953     CHKERRQ(ierr);
954     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
955     CHKERRQ(ierr);
956     while (its--) {
957       for ( i=0; i<m; i++ ) {
958         n    = A->i[i+1] - A->i[i];
959         idx  = A->j + A->i[i] + shift;
960         v    = A->a + A->i[i] + shift;
961         sum  = b[i];
962         SPARSEDENSEMDOT(sum,xs,v,idx,n);
963         d    = fshift + A->a[diag[i]+shift];
964         n    = B->i[i+1] - B->i[i];
965         idx  = B->j + B->i[i] + shift;
966         v    = B->a + B->i[i] + shift;
967         SPARSEDENSEMDOT(sum,ls,v,idx,n);
968         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
969       }
970     }
971   }
972   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
973     if (flag & SOR_ZERO_INITIAL_GUESS) {
974       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
975     }
976     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
977                             mat->Mvctx); CHKERRQ(ierr);
978     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
979                             mat->Mvctx); CHKERRQ(ierr);
980     while (its--) {
981       for ( i=m-1; i>-1; i-- ) {
982         n    = A->i[i+1] - A->i[i];
983         idx  = A->j + A->i[i] + shift;
984         v    = A->a + A->i[i] + shift;
985         sum  = b[i];
986         SPARSEDENSEMDOT(sum,xs,v,idx,n);
987         d    = fshift + A->a[diag[i]+shift];
988         n    = B->i[i+1] - B->i[i];
989         idx  = B->j + B->i[i] + shift;
990         v    = B->a + B->i[i] + shift;
991         SPARSEDENSEMDOT(sum,ls,v,idx,n);
992         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
993       }
994     }
995   }
996   else {
997     SETERRQ(1,0,"Option not supported");
998   }
999   return 0;
1000 }
1001 
1002 #undef __FUNC__
1003 #define __FUNC__ "MatGetInfo_MPIAIJ"
1004 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1005 {
1006   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1007   Mat        A = mat->A, B = mat->B;
1008   int        ierr;
1009   double     isend[5], irecv[5];
1010 
1011   info->rows_global    = (double)mat->M;
1012   info->columns_global = (double)mat->N;
1013   info->rows_local     = (double)mat->m;
1014   info->columns_local  = (double)mat->N;
1015   info->block_size     = 1.0;
1016   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1017   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1018   isend[3] = info->memory;  isend[4] = info->mallocs;
1019   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1020   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1021   isend[3] += info->memory;  isend[4] += info->mallocs;
1022   if (flag == MAT_LOCAL) {
1023     info->nz_used      = isend[0];
1024     info->nz_allocated = isend[1];
1025     info->nz_unneeded  = isend[2];
1026     info->memory       = isend[3];
1027     info->mallocs      = isend[4];
1028   } else if (flag == MAT_GLOBAL_MAX) {
1029     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1030     info->nz_used      = irecv[0];
1031     info->nz_allocated = irecv[1];
1032     info->nz_unneeded  = irecv[2];
1033     info->memory       = irecv[3];
1034     info->mallocs      = irecv[4];
1035   } else if (flag == MAT_GLOBAL_SUM) {
1036     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1037     info->nz_used      = irecv[0];
1038     info->nz_allocated = irecv[1];
1039     info->nz_unneeded  = irecv[2];
1040     info->memory       = irecv[3];
1041     info->mallocs      = irecv[4];
1042   }
1043   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1044   info->fill_ratio_needed = 0;
1045   info->factor_mallocs    = 0;
1046 
1047   return 0;
1048 }
1049 
1050 #undef __FUNC__
1051 #define __FUNC__ "MatSetOption_MPIAIJ"
1052 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1055 
1056   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1057       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1058       op == MAT_COLUMNS_UNSORTED ||
1059       op == MAT_COLUMNS_SORTED ||
1060       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1061       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1062         MatSetOption(a->A,op);
1063         MatSetOption(a->B,op);
1064   } else if (op == MAT_ROW_ORIENTED) {
1065     a->roworiented = 1;
1066     MatSetOption(a->A,op);
1067     MatSetOption(a->B,op);
1068   } else if (op == MAT_ROWS_SORTED ||
1069              op == MAT_ROWS_UNSORTED ||
1070              op == MAT_SYMMETRIC ||
1071              op == MAT_STRUCTURALLY_SYMMETRIC ||
1072              op == MAT_YES_NEW_DIAGONALS)
1073     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1074   else if (op == MAT_COLUMN_ORIENTED) {
1075     a->roworiented = 0;
1076     MatSetOption(a->A,op);
1077     MatSetOption(a->B,op);
1078   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1079     a->donotstash = 1;
1080   } else if (op == MAT_NO_NEW_DIAGONALS)
1081     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1082   else
1083     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1084   return 0;
1085 }
1086 
1087 #undef __FUNC__
1088 #define __FUNC__ "MatGetSize_MPIAIJ"
1089 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1090 {
1091   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1092   *m = mat->M; *n = mat->N;
1093   return 0;
1094 }
1095 
1096 #undef __FUNC__
1097 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1098 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1099 {
1100   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1101   *m = mat->m; *n = mat->N;
1102   return 0;
1103 }
1104 
1105 #undef __FUNC__
1106 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1107 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1108 {
1109   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1110   *m = mat->rstart; *n = mat->rend;
1111   return 0;
1112 }
1113 
1114 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1115 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1116 
1117 #undef __FUNC__
1118 #define __FUNC__ "MatGetRow_MPIAIJ"
1119 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1120 {
1121   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1122   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1123   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1124   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1125   int        *cmap, *idx_p;
1126 
1127   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1128   mat->getrowactive = PETSC_TRUE;
1129 
1130   if (!mat->rowvalues && (idx || v)) {
1131     /*
1132         allocate enough space to hold information from the longest row.
1133     */
1134     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1135     int     max = 1,m = mat->m,tmp;
1136     for ( i=0; i<m; i++ ) {
1137       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1138       if (max < tmp) { max = tmp; }
1139     }
1140     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1141     CHKPTRQ(mat->rowvalues);
1142     mat->rowindices = (int *) (mat->rowvalues + max);
1143   }
1144 
1145   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1146   lrow = row - rstart;
1147 
1148   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1149   if (!v)   {pvA = 0; pvB = 0;}
1150   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1151   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1152   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1153   nztot = nzA + nzB;
1154 
1155   cmap  = mat->garray;
1156   if (v  || idx) {
1157     if (nztot) {
1158       /* Sort by increasing column numbers, assuming A and B already sorted */
1159       int imark = -1;
1160       if (v) {
1161         *v = v_p = mat->rowvalues;
1162         for ( i=0; i<nzB; i++ ) {
1163           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1164           else break;
1165         }
1166         imark = i;
1167         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1168         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1169       }
1170       if (idx) {
1171         *idx = idx_p = mat->rowindices;
1172         if (imark > -1) {
1173           for ( i=0; i<imark; i++ ) {
1174             idx_p[i] = cmap[cworkB[i]];
1175           }
1176         } else {
1177           for ( i=0; i<nzB; i++ ) {
1178             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1179             else break;
1180           }
1181           imark = i;
1182         }
1183         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1184         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1185       }
1186     }
1187     else {
1188       if (idx) *idx = 0;
1189       if (v)   *v   = 0;
1190     }
1191   }
1192   *nz = nztot;
1193   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1194   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1195   return 0;
1196 }
1197 
1198 #undef __FUNC__
1199 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1200 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1201 {
1202   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1203   if (aij->getrowactive == PETSC_FALSE) {
1204     SETERRQ(1,0,"MatGetRow not called");
1205   }
1206   aij->getrowactive = PETSC_FALSE;
1207   return 0;
1208 }
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatNorm_MPIAIJ"
1212 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1213 {
1214   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1215   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1216   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1217   double     sum = 0.0;
1218   Scalar     *v;
1219 
1220   if (aij->size == 1) {
1221     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1222   } else {
1223     if (type == NORM_FROBENIUS) {
1224       v = amat->a;
1225       for (i=0; i<amat->nz; i++ ) {
1226 #if defined(PETSC_COMPLEX)
1227         sum += real(conj(*v)*(*v)); v++;
1228 #else
1229         sum += (*v)*(*v); v++;
1230 #endif
1231       }
1232       v = bmat->a;
1233       for (i=0; i<bmat->nz; i++ ) {
1234 #if defined(PETSC_COMPLEX)
1235         sum += real(conj(*v)*(*v)); v++;
1236 #else
1237         sum += (*v)*(*v); v++;
1238 #endif
1239       }
1240       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1241       *norm = sqrt(*norm);
1242     }
1243     else if (type == NORM_1) { /* max column norm */
1244       double *tmp, *tmp2;
1245       int    *jj, *garray = aij->garray;
1246       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1247       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1248       PetscMemzero(tmp,aij->N*sizeof(double));
1249       *norm = 0.0;
1250       v = amat->a; jj = amat->j;
1251       for ( j=0; j<amat->nz; j++ ) {
1252         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1253       }
1254       v = bmat->a; jj = bmat->j;
1255       for ( j=0; j<bmat->nz; j++ ) {
1256         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1257       }
1258       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1259       for ( j=0; j<aij->N; j++ ) {
1260         if (tmp2[j] > *norm) *norm = tmp2[j];
1261       }
1262       PetscFree(tmp); PetscFree(tmp2);
1263     }
1264     else if (type == NORM_INFINITY) { /* max row norm */
1265       double ntemp = 0.0;
1266       for ( j=0; j<amat->m; j++ ) {
1267         v = amat->a + amat->i[j] + shift;
1268         sum = 0.0;
1269         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1270           sum += PetscAbsScalar(*v); v++;
1271         }
1272         v = bmat->a + bmat->i[j] + shift;
1273         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1274           sum += PetscAbsScalar(*v); v++;
1275         }
1276         if (sum > ntemp) ntemp = sum;
1277       }
1278       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1279     }
1280     else {
1281       SETERRQ(1,0,"No support for two norm");
1282     }
1283   }
1284   return 0;
1285 }
1286 
1287 #undef __FUNC__
1288 #define __FUNC__ "MatTranspose_MPIAIJ"
1289 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1290 {
1291   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1292   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1293   int        ierr,shift = Aloc->indexshift;
1294   Mat        B;
1295   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1296   Scalar     *array;
1297 
1298   if (matout == PETSC_NULL && M != N) {
1299     SETERRQ(1,0,"Square matrix only for in-place");
1300   }
1301 
1302   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1303 
1304   /* copy over the A part */
1305   Aloc = (Mat_SeqAIJ*) a->A->data;
1306   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1307   row = a->rstart;
1308   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1309   for ( i=0; i<m; i++ ) {
1310     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1311     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1312   }
1313   aj = Aloc->j;
1314   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1315 
1316   /* copy over the B part */
1317   Aloc = (Mat_SeqAIJ*) a->B->data;
1318   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1319   row = a->rstart;
1320   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1321   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1322   for ( i=0; i<m; i++ ) {
1323     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1324     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1325   }
1326   PetscFree(ct);
1327   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1328   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1329   if (matout != PETSC_NULL) {
1330     *matout = B;
1331   } else {
1332     /* This isn't really an in-place transpose .... but free data structures from a */
1333     PetscFree(a->rowners);
1334     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1335     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1336     if (a->colmap) PetscFree(a->colmap);
1337     if (a->garray) PetscFree(a->garray);
1338     if (a->lvec) VecDestroy(a->lvec);
1339     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1340     PetscFree(a);
1341     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1342     PetscHeaderDestroy(B);
1343   }
1344   return 0;
1345 }
1346 
1347 #undef __FUNC__
1348 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1349 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1350 {
1351   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1352   Mat a = aij->A, b = aij->B;
1353   int ierr,s1,s2,s3;
1354 
1355   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1356   if (rr) {
1357     s3 = aij->n;
1358     VecGetLocalSize_Fast(rr,s1);
1359     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1360     /* Overlap communication with computation. */
1361     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1362   }
1363   if (ll) {
1364     VecGetLocalSize_Fast(ll,s1);
1365     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1366     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1367   }
1368   /* scale  the diagonal block */
1369   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1370 
1371   if (rr) {
1372     /* Do a scatter end and then right scale the off-diagonal block */
1373     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1374     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1375   }
1376 
1377   return 0;
1378 }
1379 
1380 
1381 extern int MatPrintHelp_SeqAIJ(Mat);
1382 #undef __FUNC__
1383 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1384 int MatPrintHelp_MPIAIJ(Mat A)
1385 {
1386   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1387 
1388   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1389   else return 0;
1390 }
1391 
1392 #undef __FUNC__
1393 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1394 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1395 {
1396   *bs = 1;
1397   return 0;
1398 }
1399 #undef __FUNC__
1400 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1401 int MatSetUnfactored_MPIAIJ(Mat A)
1402 {
1403   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1404   int        ierr;
1405   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1406   return 0;
1407 }
1408 
1409 #undef __FUNC__
1410 #define __FUNC__ "MatEqual_MPIAIJ"
1411 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1412 {
1413   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1414   Mat        a, b, c, d;
1415   PetscTruth flg;
1416   int        ierr;
1417 
1418   if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type");
1419   a = matA->A; b = matA->B;
1420   c = matB->A; d = matB->B;
1421 
1422   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1423   if (flg == PETSC_TRUE) {
1424     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1425   }
1426   MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);
1427   return(0);
1428 }
1429 
1430 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1431 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1432 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1433 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1434 /* -------------------------------------------------------------------*/
1435 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1436        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1437        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1438        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1439        0,0,
1440        0,0,
1441        0,0,
1442        MatRelax_MPIAIJ,
1443        MatTranspose_MPIAIJ,
1444        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1445        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1446        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1447        0,
1448        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1449        0,0,0,0,
1450        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1451        0,0,
1452        0,0,MatConvertSameType_MPIAIJ,0,0,
1453        0,0,0,
1454        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1455        MatPrintHelp_MPIAIJ,
1456        MatScale_MPIAIJ,0,0,0,
1457        MatGetBlockSize_MPIAIJ,0,0,0,0,
1458        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1459 
1460 
1461 #undef __FUNC__
1462 #define __FUNC__ "MatCreateMPIAIJ"
1463 /*@C
1464    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1465    (the default parallel PETSc format).  For good matrix assembly performance
1466    the user should preallocate the matrix storage by setting the parameters
1467    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1468    performance can be increased by more than a factor of 50.
1469 
1470    Input Parameters:
1471 .  comm - MPI communicator
1472 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1473            This value should be the same as the local size used in creating the
1474            y vector for the matrix-vector product y = Ax.
1475 .  n - This value should be the same as the local size used in creating the
1476        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1477        calculated if N is given) For square matrices n is almost always m.
1478 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1479 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1480 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1481            (same for all local rows)
1482 .  d_nzz - array containing the number of nonzeros in the various rows of the
1483            diagonal portion of the local submatrix (possibly different for each row)
1484            or PETSC_NULL. You must leave room for the diagonal entry even if
1485            it is zero.
1486 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1487            submatrix (same for all local rows).
1488 .  o_nzz - array containing the number of nonzeros in the various rows of the
1489            off-diagonal portion of the local submatrix (possibly different for
1490            each row) or PETSC_NULL.
1491 
1492    Output Parameter:
1493 .  A - the matrix
1494 
1495    Notes:
1496    The AIJ format (also called the Yale sparse matrix format or
1497    compressed row storage), is fully compatible with standard Fortran 77
1498    storage.  That is, the stored row and column indices can begin at
1499    either one (as in Fortran) or zero.  See the users manual for details.
1500 
1501    The user MUST specify either the local or global matrix dimensions
1502    (possibly both).
1503 
1504    By default, this format uses inodes (identical nodes) when possible.
1505    We search for consecutive rows with the same nonzero structure, thereby
1506    reusing matrix information to achieve increased efficiency.
1507 
1508    Options Database Keys:
1509 $    -mat_aij_no_inode  - Do not use inodes
1510 $    -mat_aij_inode_limit <limit> - Set inode limit.
1511 $        (max limit=5)
1512 $    -mat_aij_oneindex - Internally use indexing starting at 1
1513 $        rather than 0.  Note: When calling MatSetValues(),
1514 $        the user still MUST index entries starting at 0!
1515 
1516    Storage Information:
1517    For a square global matrix we define each processor's diagonal portion
1518    to be its local rows and the corresponding columns (a square submatrix);
1519    each processor's off-diagonal portion encompasses the remainder of the
1520    local matrix (a rectangular submatrix).
1521 
1522    The user can specify preallocated storage for the diagonal part of
1523    the local submatrix with either d_nz or d_nnz (not both).  Set
1524    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1525    memory allocation.  Likewise, specify preallocated storage for the
1526    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1527 
1528    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1529    the figure below we depict these three local rows and all columns (0-11).
1530 
1531 $          0 1 2 3 4 5 6 7 8 9 10 11
1532 $         -------------------
1533 $  row 3  |  o o o d d d o o o o o o
1534 $  row 4  |  o o o d d d o o o o o o
1535 $  row 5  |  o o o d d d o o o o o o
1536 $         -------------------
1537 $
1538 
1539    Thus, any entries in the d locations are stored in the d (diagonal)
1540    submatrix, and any entries in the o locations are stored in the
1541    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1542    stored simply in the MATSEQAIJ format for compressed row storage.
1543 
1544    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1545    and o_nz should indicate the number of nonzeros per row in the o matrix.
1546    In general, for PDE problems in which most nonzeros are near the diagonal,
1547    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1548    or you will get TERRIBLE performance; see the users' manual chapter on
1549    matrices.
1550 
1551 .keywords: matrix, aij, compressed row, sparse, parallel
1552 
1553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1554 @*/
1555 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1556                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1557 {
1558   Mat          B;
1559   Mat_MPIAIJ   *b;
1560   int          ierr, i,sum[2],work[2],size;
1561 
1562   MPI_Comm_size(comm,&size);
1563   if (size == 1) {
1564     if (M == PETSC_DECIDE) M = m;
1565     if (N == PETSC_DECIDE) N = n;
1566     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1567     return 0;
1568   }
1569 
1570   *A = 0;
1571   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1572   PLogObjectCreate(B);
1573   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1574   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1575   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1576   B->destroy    = MatDestroy_MPIAIJ;
1577   B->view       = MatView_MPIAIJ;
1578   B->factor     = 0;
1579   B->assembled  = PETSC_FALSE;
1580   B->mapping    = 0;
1581 
1582   B->insertmode = NOT_SET_VALUES;
1583   b->size       = size;
1584   MPI_Comm_rank(comm,&b->rank);
1585 
1586   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1587     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1588 
1589   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1590     work[0] = m; work[1] = n;
1591     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1592     if (M == PETSC_DECIDE) M = sum[0];
1593     if (N == PETSC_DECIDE) N = sum[1];
1594   }
1595   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1596   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1597   b->m = m; B->m = m;
1598   b->n = n; B->n = n;
1599   b->N = N; B->N = N;
1600   b->M = M; B->M = M;
1601 
1602   /* build local table of row and column ownerships */
1603   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1604   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1605   b->cowners = b->rowners + b->size + 2;
1606   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1607   b->rowners[0] = 0;
1608   for ( i=2; i<=b->size; i++ ) {
1609     b->rowners[i] += b->rowners[i-1];
1610   }
1611   b->rstart = b->rowners[b->rank];
1612   b->rend   = b->rowners[b->rank+1];
1613   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1614   b->cowners[0] = 0;
1615   for ( i=2; i<=b->size; i++ ) {
1616     b->cowners[i] += b->cowners[i-1];
1617   }
1618   b->cstart = b->cowners[b->rank];
1619   b->cend   = b->cowners[b->rank+1];
1620 
1621   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1622   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1623   PLogObjectParent(B,b->A);
1624   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1625   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1626   PLogObjectParent(B,b->B);
1627 
1628   /* build cache for off array entries formed */
1629   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1630   b->donotstash  = 0;
1631   b->colmap      = 0;
1632   b->garray      = 0;
1633   b->roworiented = 1;
1634 
1635   /* stuff used for matrix vector multiply */
1636   b->lvec      = 0;
1637   b->Mvctx     = 0;
1638 
1639   /* stuff for MatGetRow() */
1640   b->rowindices   = 0;
1641   b->rowvalues    = 0;
1642   b->getrowactive = PETSC_FALSE;
1643 
1644   *A = B;
1645   return 0;
1646 }
1647 
1648 #undef __FUNC__
1649 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1650 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1651 {
1652   Mat        mat;
1653   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1654   int        ierr, len=0, flg;
1655 
1656   *newmat       = 0;
1657   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1658   PLogObjectCreate(mat);
1659   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1660   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1661   mat->destroy    = MatDestroy_MPIAIJ;
1662   mat->view       = MatView_MPIAIJ;
1663   mat->factor     = matin->factor;
1664   mat->assembled  = PETSC_TRUE;
1665 
1666   a->m = mat->m   = oldmat->m;
1667   a->n = mat->n   = oldmat->n;
1668   a->M = mat->M   = oldmat->M;
1669   a->N = mat->N   = oldmat->N;
1670 
1671   a->rstart       = oldmat->rstart;
1672   a->rend         = oldmat->rend;
1673   a->cstart       = oldmat->cstart;
1674   a->cend         = oldmat->cend;
1675   a->size         = oldmat->size;
1676   a->rank         = oldmat->rank;
1677   mat->insertmode = NOT_SET_VALUES;
1678   a->rowvalues    = 0;
1679   a->getrowactive = PETSC_FALSE;
1680 
1681   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1682   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1683   a->cowners = a->rowners + a->size + 2;
1684   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1685   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1686   if (oldmat->colmap) {
1687     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1688     PLogObjectMemory(mat,(a->N)*sizeof(int));
1689     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1690   } else a->colmap = 0;
1691   if (oldmat->garray) {
1692     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1693     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1694     PLogObjectMemory(mat,len*sizeof(int));
1695     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1696   } else a->garray = 0;
1697 
1698   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1699   PLogObjectParent(mat,a->lvec);
1700   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1701   PLogObjectParent(mat,a->Mvctx);
1702   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1703   PLogObjectParent(mat,a->A);
1704   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1705   PLogObjectParent(mat,a->B);
1706   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1707   if (flg) {
1708     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1709   }
1710   *newmat = mat;
1711   return 0;
1712 }
1713 
1714 #include "sys.h"
1715 
1716 #undef __FUNC__
1717 #define __FUNC__ "MatLoad_MPIAIJ"
1718 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1719 {
1720   Mat          A;
1721   int          i, nz, ierr, j,rstart, rend, fd;
1722   Scalar       *vals,*svals;
1723   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1724   MPI_Status   status;
1725   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1726   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1727   int          tag = ((PetscObject)viewer)->tag;
1728 
1729   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1730   if (!rank) {
1731     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1732     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1733     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1734   }
1735 
1736   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1737   M = header[1]; N = header[2];
1738   /* determine ownership of all rows */
1739   m = M/size + ((M % size) > rank);
1740   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1741   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1742   rowners[0] = 0;
1743   for ( i=2; i<=size; i++ ) {
1744     rowners[i] += rowners[i-1];
1745   }
1746   rstart = rowners[rank];
1747   rend   = rowners[rank+1];
1748 
1749   /* distribute row lengths to all processors */
1750   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1751   offlens = ourlens + (rend-rstart);
1752   if (!rank) {
1753     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1754     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1755     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1756     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1757     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1758     PetscFree(sndcounts);
1759   }
1760   else {
1761     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1762   }
1763 
1764   if (!rank) {
1765     /* calculate the number of nonzeros on each processor */
1766     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1767     PetscMemzero(procsnz,size*sizeof(int));
1768     for ( i=0; i<size; i++ ) {
1769       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1770         procsnz[i] += rowlengths[j];
1771       }
1772     }
1773     PetscFree(rowlengths);
1774 
1775     /* determine max buffer needed and allocate it */
1776     maxnz = 0;
1777     for ( i=0; i<size; i++ ) {
1778       maxnz = PetscMax(maxnz,procsnz[i]);
1779     }
1780     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1781 
1782     /* read in my part of the matrix column indices  */
1783     nz = procsnz[0];
1784     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1785     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1786 
1787     /* read in every one elses and ship off */
1788     for ( i=1; i<size; i++ ) {
1789       nz = procsnz[i];
1790       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1791       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1792     }
1793     PetscFree(cols);
1794   }
1795   else {
1796     /* determine buffer space needed for message */
1797     nz = 0;
1798     for ( i=0; i<m; i++ ) {
1799       nz += ourlens[i];
1800     }
1801     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1802 
1803     /* receive message of column indices*/
1804     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1805     MPI_Get_count(&status,MPI_INT,&maxnz);
1806     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1807   }
1808 
1809   /* loop over local rows, determining number of off diagonal entries */
1810   PetscMemzero(offlens,m*sizeof(int));
1811   jj = 0;
1812   for ( i=0; i<m; i++ ) {
1813     for ( j=0; j<ourlens[i]; j++ ) {
1814       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1815       jj++;
1816     }
1817   }
1818 
1819   /* create our matrix */
1820   for ( i=0; i<m; i++ ) {
1821     ourlens[i] -= offlens[i];
1822   }
1823   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1824   A = *newmat;
1825   MatSetOption(A,MAT_COLUMNS_SORTED);
1826   for ( i=0; i<m; i++ ) {
1827     ourlens[i] += offlens[i];
1828   }
1829 
1830   if (!rank) {
1831     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1832 
1833     /* read in my part of the matrix numerical values  */
1834     nz = procsnz[0];
1835     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1836 
1837     /* insert into matrix */
1838     jj      = rstart;
1839     smycols = mycols;
1840     svals   = vals;
1841     for ( i=0; i<m; i++ ) {
1842       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1843       smycols += ourlens[i];
1844       svals   += ourlens[i];
1845       jj++;
1846     }
1847 
1848     /* read in other processors and ship out */
1849     for ( i=1; i<size; i++ ) {
1850       nz = procsnz[i];
1851       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1852       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1853     }
1854     PetscFree(procsnz);
1855   }
1856   else {
1857     /* receive numeric values */
1858     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1859 
1860     /* receive message of values*/
1861     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1862     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1863     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1864 
1865     /* insert into matrix */
1866     jj      = rstart;
1867     smycols = mycols;
1868     svals   = vals;
1869     for ( i=0; i<m; i++ ) {
1870       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1871       smycols += ourlens[i];
1872       svals   += ourlens[i];
1873       jj++;
1874     }
1875   }
1876   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1877 
1878   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1879   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1880   return 0;
1881 }
1882