xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 98166b15b11922357fb11bb2cf7cf9eb33ae6adf)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.223 1997/10/29 14:07:25 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   PetscFunctionBegin;
24   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
25   PLogObjectMemory(mat,aij->N*sizeof(int));
26   PetscMemzero(aij->colmap,aij->N*sizeof(int));
27   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
28   PetscFunctionReturn(0);
29 }
30 
31 extern int DisAssemble_MPIAIJ(Mat);
32 
33 #define CHUNKSIZE   15
34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
35 { \
36  \
37     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
38     rmax = aimax[row]; nrow = ailen[row];  \
39     col1 = col - shift; \
40      \
41     low = 0; high = nrow; \
42     while (high-low > 5) { \
43       t = (low+high)/2; \
44       if (rp[t] > col) high = t; \
45       else             low  = t; \
46     } \
47       for ( _i=0; _i<nrow; _i++ ) { \
48         if (rp[_i] > col1) break; \
49         if (rp[_i] == col1) { \
50           if (addv == ADD_VALUES) ap[_i] += value;   \
51           else                  ap[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
57       if (nrow >= rmax) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
60         Scalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
63  \
64         /* malloc new storage space */ \
65         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
66         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
67         new_j   = (int *) (new_a + new_nz); \
68         new_i   = new_j + new_nz; \
69  \
70         /* copy over old data into new slots */ \
71         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
72         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
73         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
74         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
75         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
76                                                            len*sizeof(int)); \
77         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
78         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
79                                                            len*sizeof(Scalar));  \
80         /* free up old matrix storage */ \
81  \
82         PetscFree(a->a);  \
83         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
84         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
85         a->singlemalloc = 1; \
86  \
87         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
88         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
89         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
90         a->maxnz += CHUNKSIZE; \
91         a->reallocs++; \
92       } \
93       N = nrow++ - 1; a->nz++; \
94       /* shift up all the later entries in this row */ \
95       for ( ii=N; ii>=_i; ii-- ) { \
96         rp[ii+1] = rp[ii]; \
97         ap[ii+1] = ap[ii]; \
98       } \
99       rp[_i] = col1;  \
100       ap[_i] = value;  \
101       a_noinsert: ; \
102       ailen[row] = nrow; \
103 }
104 
105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
106 { \
107  \
108     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
109     rmax = bimax[row]; nrow = bilen[row];  \
110     col1 = col - shift; \
111      \
112     low = 0; high = nrow; \
113     while (high-low > 5) { \
114       t = (low+high)/2; \
115       if (rp[t] > col) high = t; \
116       else             low  = t; \
117     } \
118        for ( _i=0; _i<nrow; _i++ ) { \
119         if (rp[_i] > col1) break; \
120         if (rp[_i] == col1) { \
121           if (addv == ADD_VALUES) ap[_i] += value;   \
122           else                  ap[_i] = value; \
123           goto b_noinsert; \
124         } \
125       }  \
126       if (nonew == 1) goto b_noinsert; \
127       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
128       if (nrow >= rmax) { \
129         /* there is no extra room in row, therefore enlarge */ \
130         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
131         Scalar *new_a; \
132  \
133         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
134  \
135         /* malloc new storage space */ \
136         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
137         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
138         new_j   = (int *) (new_a + new_nz); \
139         new_i   = new_j + new_nz; \
140  \
141         /* copy over old data into new slots */ \
142         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
143         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
144         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
145         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
146         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
147                                                            len*sizeof(int)); \
148         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
149         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
150                                                            len*sizeof(Scalar));  \
151         /* free up old matrix storage */ \
152  \
153         PetscFree(b->a);  \
154         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
155         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
156         b->singlemalloc = 1; \
157  \
158         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
159         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
160         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
161         b->maxnz += CHUNKSIZE; \
162         b->reallocs++; \
163       } \
164       N = nrow++ - 1; b->nz++; \
165       /* shift up all the later entries in this row */ \
166       for ( ii=N; ii>=_i; ii-- ) { \
167         rp[ii+1] = rp[ii]; \
168         ap[ii+1] = ap[ii]; \
169       } \
170       rp[_i] = col1;  \
171       ap[_i] = value;  \
172       b_noinsert: ; \
173       bilen[row] = nrow; \
174 }
175 
176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
177 #undef __FUNC__
178 #define __FUNC__ "MatSetValues_MPIAIJ"
179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
180 {
181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
182   Scalar     value;
183   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
184   int        cstart = aij->cstart, cend = aij->cend,row,col;
185   int        roworiented = aij->roworiented;
186 
187   /* Some Variables required in the macro */
188   Mat        A = aij->A;
189   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
190   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
191   Scalar     *aa = a->a;
192 
193   Mat        B = aij->B;
194   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
195   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
196   Scalar     *ba = b->a;
197 
198   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
199   int        nonew = a->nonew,shift = a->indexshift;
200   Scalar     *ap;
201 
202   PetscFunctionBegin;
203   for ( i=0; i<m; i++ ) {
204 #if defined(USE_PETSC_BOPT_g)
205     if (im[i] < 0) SETERRQ(1,0,"Negative row");
206     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
207 #endif
208     if (im[i] >= rstart && im[i] < rend) {
209       row = im[i] - rstart;
210       for ( j=0; j<n; j++ ) {
211         if (in[j] >= cstart && in[j] < cend){
212           col = in[j] - cstart;
213           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
214           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
215           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
216         }
217 #if defined(USE_PETSC_BOPT_g)
218         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
219         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
220 #endif
221         else {
222           if (mat->was_assembled) {
223             if (!aij->colmap) {
224               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
225             }
226             col = aij->colmap[in[j]] - 1;
227             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
228               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
229               col =  in[j];
230               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
231               B = aij->B;
232               b = (Mat_SeqAIJ *) B->data;
233               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
234               ba = b->a;
235             }
236           }
237           else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     }
294     else {
295       SETERRQ(1,0,"Only local values currently supported");
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNC__
302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
304 {
305   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
306   MPI_Comm    comm = mat->comm;
307   int         size = aij->size, *owners = aij->rowners;
308   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
309   MPI_Request *send_waits,*recv_waits;
310   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
311   InsertMode  addv;
312   Scalar      *rvalues,*svalues;
313 
314   PetscFunctionBegin;
315   /* make sure all processors are either in INSERTMODE or ADDMODE */
316   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
317   if (addv == (ADD_VALUES|INSERT_VALUES)) {
318     SETERRQ(1,0,"Some processors inserted others added");
319   }
320   mat->insertmode = addv; /* in case this processor had no cache */
321 
322   /*  first count number of contributors to each processor */
323   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
324   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
325   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
326   for ( i=0; i<aij->stash.n; i++ ) {
327     idx = aij->stash.idx[i];
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
331       }
332     }
333   }
334   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
335 
336   /* inform other processors of number of messages and max length*/
337   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
338   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
339   nreceives = work[rank];
340   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
341   nmax = work[rank];
342   PetscFree(work);
343 
344   /* post receives:
345        1) each message will consist of ordered pairs
346      (global index,value) we store the global index as a double
347      to simplify the message passing.
348        2) since we don't know how long each individual message is we
349      allocate the largest needed buffer for each receive. Potentially
350      this is a lot of wasted space.
351 
352 
353        This could be done better.
354   */
355   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
357   for ( i=0; i<nreceives; i++ ) {
358     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
359               comm,recv_waits+i);CHKERRQ(ierr);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
367   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
368   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
369   starts[0]  = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<aij->stash.n; i++ ) {
372     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
373     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
374     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
375   }
376   PetscFree(owner);
377   starts[0] = 0;
378   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<size; i++ ) {
381     if (procs[i]) {
382       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   PetscFunctionReturn(0);
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   PetscFunctionBegin;
412   /*  wait on receives */
413   while (count) {
414     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
415     /* unpack receives into our local space */
416     values = aij->rvalues + 3*imdex*aij->rmax;
417     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
418     n = n/3;
419     for ( i=0; i<n; i++ ) {
420       row = (int) PetscReal(values[3*i]) - aij->rstart;
421       col = (int) PetscReal(values[3*i+1]);
422       val = values[3*i+2];
423       if (col >= aij->cstart && col < aij->cend) {
424         col -= aij->cstart;
425         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
426       } else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478 
479   PetscFunctionBegin;
480   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
481   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 /* the code does not do the diagonal entries correctly unless the
486    matrix is square and the column and row owerships are identical.
487    This is a BUG. The only way to fix it seems to be to access
488    aij->A and aij->B directly and not through the MatZeroRows()
489    routine.
490 */
491 #undef __FUNC__
492 #define __FUNC__ "MatZeroRows_MPIAIJ"
493 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
494 {
495   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
496   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
497   int            *procs,*nprocs,j,found,idx,nsends,*work;
498   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
499   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
500   int            *lens,imdex,*lrows,*values;
501   MPI_Comm       comm = A->comm;
502   MPI_Request    *send_waits,*recv_waits;
503   MPI_Status     recv_status,*send_status;
504   IS             istmp;
505 
506   PetscFunctionBegin;
507   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
508   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
509 
510   /*  first count number of contributors to each processor */
511   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
512   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
513   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
514   for ( i=0; i<N; i++ ) {
515     idx = rows[i];
516     found = 0;
517     for ( j=0; j<size; j++ ) {
518       if (idx >= owners[j] && idx < owners[j+1]) {
519         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
520       }
521     }
522     if (!found) SETERRQ(1,0,"Index out of range");
523   }
524   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
525 
526   /* inform other processors of number of messages and max length*/
527   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
528   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
529   nrecvs = work[rank];
530   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
531   nmax = work[rank];
532   PetscFree(work);
533 
534   /* post receives:   */
535   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
536   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
537   for ( i=0; i<nrecvs; i++ ) {
538     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
539   }
540 
541   /* do sends:
542       1) starts[i] gives the starting index in svalues for stuff going to
543          the ith processor
544   */
545   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
546   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
547   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
548   starts[0] = 0;
549   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
550   for ( i=0; i<N; i++ ) {
551     svalues[starts[owner[i]]++] = rows[i];
552   }
553   ISRestoreIndices(is,&rows);
554 
555   starts[0] = 0;
556   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
557   count = 0;
558   for ( i=0; i<size; i++ ) {
559     if (procs[i]) {
560       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
561     }
562   }
563   PetscFree(starts);
564 
565   base = owners[rank];
566 
567   /*  wait on receives */
568   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
569   source = lens + nrecvs;
570   count  = nrecvs; slen = 0;
571   while (count) {
572     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
573     /* unpack receives into our local space */
574     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
575     source[imdex]  = recv_status.MPI_SOURCE;
576     lens[imdex]  = n;
577     slen += n;
578     count--;
579   }
580   PetscFree(recv_waits);
581 
582   /* move the data into the send scatter */
583   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
584   count = 0;
585   for ( i=0; i<nrecvs; i++ ) {
586     values = rvalues + i*nmax;
587     for ( j=0; j<lens[i]; j++ ) {
588       lrows[count++] = values[j] - base;
589     }
590   }
591   PetscFree(rvalues); PetscFree(lens);
592   PetscFree(owner); PetscFree(nprocs);
593 
594   /* actually zap the local rows */
595   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
596   PLogObjectParent(A,istmp);
597   PetscFree(lrows);
598   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
599   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
600   ierr = ISDestroy(istmp); CHKERRQ(ierr);
601 
602   /* wait on sends */
603   if (nsends) {
604     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
605     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
606     PetscFree(send_status);
607   }
608   PetscFree(send_waits); PetscFree(svalues);
609 
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatMult_MPIAIJ"
615 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
618   int        ierr,nt;
619 
620   PetscFunctionBegin;
621   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
622   if (nt != a->n) {
623     SETERRQ(1,0,"Incompatible partition of A and xx");
624   }
625   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
626   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
627   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
628   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNC__
633 #define __FUNC__ "MatMultAdd_MPIAIJ"
634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
635 {
636   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
637   int        ierr;
638 
639   PetscFunctionBegin;
640   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
641   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
642   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
643   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNC__
648 #define __FUNC__ "MatMultTrans_MPIAIJ"
649 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
650 {
651   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
652   int        ierr;
653 
654   PetscFunctionBegin;
655   /* do nondiagonal part */
656   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
657   /* send it on its way */
658   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
659   /* do local part */
660   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
661   /* receive remote parts: note this assumes the values are not actually */
662   /* inserted in yy until the next line, which is true for my implementation*/
663   /* but is not perhaps always true. */
664   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
673   int        ierr;
674 
675   PetscFunctionBegin;
676   /* do nondiagonal part */
677   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
678   /* send it on its way */
679   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
680   /* do local part */
681   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
682   /* receive remote parts: note this assumes the values are not actually */
683   /* inserted in yy until the next line, which is true for my implementation*/
684   /* but is not perhaps always true. */
685   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 /*
690   This only works correctly for square matrices where the subblock A->A is the
691    diagonal block
692 */
693 #undef __FUNC__
694 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
695 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
696 {
697   int        ierr;
698   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
699 
700   PetscFunctionBegin;
701   if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
702   if (a->rstart != a->cstart || a->rend != a->cend) {
703     SETERRQ(1,0,"row partition must equal col partition");
704   }
705   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNC__
710 #define __FUNC__ "MatScale_MPIAIJ"
711 int MatScale_MPIAIJ(Scalar *aa,Mat A)
712 {
713   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
714   int        ierr;
715 
716   PetscFunctionBegin;
717   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
718   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNC__
723 #define __FUNC__ "MatDestroy_MPIAIJ"
724 int MatDestroy_MPIAIJ(PetscObject obj)
725 {
726   Mat        mat = (Mat) obj;
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
728   int        ierr;
729 
730   PetscFunctionBegin;
731 #if defined(USE_PETSC_LOG)
732   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
733 #endif
734   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
735   PetscFree(aij->rowners);
736   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
738   if (aij->colmap) PetscFree(aij->colmap);
739   if (aij->garray) PetscFree(aij->garray);
740   if (aij->lvec)   VecDestroy(aij->lvec);
741   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
742   if (aij->rowvalues) PetscFree(aij->rowvalues);
743   PetscFree(aij);
744   PLogObjectDestroy(mat);
745   PetscHeaderDestroy(mat);
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNC__
750 #define __FUNC__ "MatView_MPIAIJ_Binary"
751 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
752 {
753   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
754   int         ierr;
755 
756   PetscFunctionBegin;
757   if (aij->size == 1) {
758     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
759   }
760   else SETERRQ(1,0,"Only uniprocessor output supported");
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNC__
765 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
766 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
767 {
768   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
769   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
770   int         ierr, format,shift = C->indexshift,rank;
771   FILE        *fd;
772   ViewerType  vtype;
773 
774   PetscFunctionBegin;
775   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
776   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
777     ierr = ViewerGetFormat(viewer,&format);
778     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
779       MatInfo info;
780       int     flg;
781       MPI_Comm_rank(mat->comm,&rank);
782       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
783       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
784       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
785       PetscSequentialPhaseBegin(mat->comm,1);
786       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
787          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
788       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
789          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
790       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
791       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
792       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
793       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
794       fflush(fd);
795       PetscSequentialPhaseEnd(mat->comm,1);
796       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
797       PetscFunctionReturn(0);
798     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
799       PetscFunctionReturn(0);
800     }
801   }
802 
803   if (vtype == DRAW_VIEWER) {
804     Draw       draw;
805     PetscTruth isnull;
806     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
807     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
808   }
809 
810   if (vtype == ASCII_FILE_VIEWER) {
811     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
812     PetscSequentialPhaseBegin(mat->comm,1);
813     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
814            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
815            aij->cend);
816     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
817     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
818     fflush(fd);
819     PetscSequentialPhaseEnd(mat->comm,1);
820   } else {
821     int size = aij->size;
822     rank = aij->rank;
823     if (size == 1) {
824       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
825     } else {
826       /* assemble the entire matrix onto first processor. */
827       Mat         A;
828       Mat_SeqAIJ *Aloc;
829       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
830       Scalar      *a;
831 
832       if (!rank) {
833         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
834                CHKERRQ(ierr);
835       } else {
836         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
837                CHKERRQ(ierr);
838       }
839       PLogObjectParent(mat,A);
840 
841       /* copy over the A part */
842       Aloc = (Mat_SeqAIJ*) aij->A->data;
843       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
844       row = aij->rstart;
845       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
846       for ( i=0; i<m; i++ ) {
847         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
848         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
849       }
850       aj = Aloc->j;
851       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
852 
853       /* copy over the B part */
854       Aloc = (Mat_SeqAIJ*) aij->B->data;
855       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
856       row = aij->rstart;
857       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
858       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
859       for ( i=0; i<m; i++ ) {
860         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
861         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
862       }
863       PetscFree(ct);
864       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
865       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
866       if (!rank) {
867         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
868       }
869       ierr = MatDestroy(A); CHKERRQ(ierr);
870     }
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNC__
876 #define __FUNC__ "MatView_MPIAIJ"
877 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
878 {
879   Mat         mat = (Mat) obj;
880   int         ierr;
881   ViewerType  vtype;
882 
883   PetscFunctionBegin;
884   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
885   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
886       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
887     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
888   }
889   else if (vtype == BINARY_FILE_VIEWER) {
890     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
891   }
892   PetscFunctionReturn(0);
893 }
894 
895 /*
896     This has to provide several versions.
897 
898      2) a) use only local smoothing updating outer values only once.
899         b) local smoothing updating outer values each inner iteration
900      3) color updating out values betwen colors.
901 */
902 #undef __FUNC__
903 #define __FUNC__ "MatRelax_MPIAIJ"
904 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
905                            double fshift,int its,Vec xx)
906 {
907   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
908   Mat        AA = mat->A, BB = mat->B;
909   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
910   Scalar     *b,*x,*xs,*ls,d,*v,sum;
911   int        ierr,*idx, *diag;
912   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
913 
914   PetscFunctionBegin;
915   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
916   xs = x + shift; /* shift by one for index start of 1 */
917   ls = ls + shift;
918   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
919   diag = A->diag;
920   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
921     if (flag & SOR_ZERO_INITIAL_GUESS) {
922       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
923       PetscFunctionReturn(0);
924     }
925     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
926     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
927     while (its--) {
928       /* go down through the rows */
929       for ( i=0; i<m; i++ ) {
930         n    = A->i[i+1] - A->i[i];
931         idx  = A->j + A->i[i] + shift;
932         v    = A->a + A->i[i] + shift;
933         sum  = b[i];
934         SPARSEDENSEMDOT(sum,xs,v,idx,n);
935         d    = fshift + A->a[diag[i]+shift];
936         n    = B->i[i+1] - B->i[i];
937         idx  = B->j + B->i[i] + shift;
938         v    = B->a + B->i[i] + shift;
939         SPARSEDENSEMDOT(sum,ls,v,idx,n);
940         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
941       }
942       /* come up through the rows */
943       for ( i=m-1; i>-1; i-- ) {
944         n    = A->i[i+1] - A->i[i];
945         idx  = A->j + A->i[i] + shift;
946         v    = A->a + A->i[i] + shift;
947         sum  = b[i];
948         SPARSEDENSEMDOT(sum,xs,v,idx,n);
949         d    = fshift + A->a[diag[i]+shift];
950         n    = B->i[i+1] - B->i[i];
951         idx  = B->j + B->i[i] + shift;
952         v    = B->a + B->i[i] + shift;
953         SPARSEDENSEMDOT(sum,ls,v,idx,n);
954         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
955       }
956     }
957   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
958     if (flag & SOR_ZERO_INITIAL_GUESS) {
959       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
960       PetscFunctionReturn(0);
961     }
962     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
963     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
964     while (its--) {
965       for ( i=0; i<m; i++ ) {
966         n    = A->i[i+1] - A->i[i];
967         idx  = A->j + A->i[i] + shift;
968         v    = A->a + A->i[i] + shift;
969         sum  = b[i];
970         SPARSEDENSEMDOT(sum,xs,v,idx,n);
971         d    = fshift + A->a[diag[i]+shift];
972         n    = B->i[i+1] - B->i[i];
973         idx  = B->j + B->i[i] + shift;
974         v    = B->a + B->i[i] + shift;
975         SPARSEDENSEMDOT(sum,ls,v,idx,n);
976         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
977       }
978     }
979   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
980     if (flag & SOR_ZERO_INITIAL_GUESS) {
981       ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     }
984     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
985                             mat->Mvctx); CHKERRQ(ierr);
986     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
987                             mat->Mvctx); CHKERRQ(ierr);
988     while (its--) {
989       for ( i=m-1; i>-1; i-- ) {
990         n    = A->i[i+1] - A->i[i];
991         idx  = A->j + A->i[i] + shift;
992         v    = A->a + A->i[i] + shift;
993         sum  = b[i];
994         SPARSEDENSEMDOT(sum,xs,v,idx,n);
995         d    = fshift + A->a[diag[i]+shift];
996         n    = B->i[i+1] - B->i[i];
997         idx  = B->j + B->i[i] + shift;
998         v    = B->a + B->i[i] + shift;
999         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1000         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1001       }
1002     }
1003   } else {
1004     SETERRQ(1,0,"Option not supported");
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNC__
1010 #define __FUNC__ "MatGetInfo_MPIAIJ"
1011 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1012 {
1013   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1014   Mat        A = mat->A, B = mat->B;
1015   int        ierr;
1016   double     isend[5], irecv[5];
1017 
1018   PetscFunctionBegin;
1019   info->rows_global    = (double)mat->M;
1020   info->columns_global = (double)mat->N;
1021   info->rows_local     = (double)mat->m;
1022   info->columns_local  = (double)mat->N;
1023   info->block_size     = 1.0;
1024   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1025   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1026   isend[3] = info->memory;  isend[4] = info->mallocs;
1027   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1028   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1029   isend[3] += info->memory;  isend[4] += info->mallocs;
1030   if (flag == MAT_LOCAL) {
1031     info->nz_used      = isend[0];
1032     info->nz_allocated = isend[1];
1033     info->nz_unneeded  = isend[2];
1034     info->memory       = isend[3];
1035     info->mallocs      = isend[4];
1036   } else if (flag == MAT_GLOBAL_MAX) {
1037     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1038     info->nz_used      = irecv[0];
1039     info->nz_allocated = irecv[1];
1040     info->nz_unneeded  = irecv[2];
1041     info->memory       = irecv[3];
1042     info->mallocs      = irecv[4];
1043   } else if (flag == MAT_GLOBAL_SUM) {
1044     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1045     info->nz_used      = irecv[0];
1046     info->nz_allocated = irecv[1];
1047     info->nz_unneeded  = irecv[2];
1048     info->memory       = irecv[3];
1049     info->mallocs      = irecv[4];
1050   }
1051   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1052   info->fill_ratio_needed = 0;
1053   info->factor_mallocs    = 0;
1054 
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNC__
1059 #define __FUNC__ "MatSetOption_MPIAIJ"
1060 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1061 {
1062   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1063 
1064   PetscFunctionBegin;
1065   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1066       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1067       op == MAT_COLUMNS_UNSORTED ||
1068       op == MAT_COLUMNS_SORTED ||
1069       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1070       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1071         MatSetOption(a->A,op);
1072         MatSetOption(a->B,op);
1073   } else if (op == MAT_ROW_ORIENTED) {
1074     a->roworiented = 1;
1075     MatSetOption(a->A,op);
1076     MatSetOption(a->B,op);
1077   } else if (op == MAT_ROWS_SORTED ||
1078              op == MAT_ROWS_UNSORTED ||
1079              op == MAT_SYMMETRIC ||
1080              op == MAT_STRUCTURALLY_SYMMETRIC ||
1081              op == MAT_YES_NEW_DIAGONALS)
1082     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1083   else if (op == MAT_COLUMN_ORIENTED) {
1084     a->roworiented = 0;
1085     MatSetOption(a->A,op);
1086     MatSetOption(a->B,op);
1087   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1088     a->donotstash = 1;
1089   } else if (op == MAT_NO_NEW_DIAGONALS){
1090     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1091   } else {
1092     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1093   }
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNC__
1098 #define __FUNC__ "MatGetSize_MPIAIJ"
1099 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1100 {
1101   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1102 
1103   PetscFunctionBegin;
1104   if (m) *m = mat->M;
1105   if (n) *n = mat->N;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1111 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1112 {
1113   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1114 
1115   PetscFunctionBegin;
1116   if (m) *m = mat->m;
1117   if (n) *n = mat->N;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1123 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1124 {
1125   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1126 
1127   PetscFunctionBegin;
1128   *m = mat->rstart; *n = mat->rend;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1133 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1134 
1135 #undef __FUNC__
1136 #define __FUNC__ "MatGetRow_MPIAIJ"
1137 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1138 {
1139   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1140   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1141   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1142   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1143   int        *cmap, *idx_p;
1144 
1145   PetscFunctionBegin;
1146   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1147   mat->getrowactive = PETSC_TRUE;
1148 
1149   if (!mat->rowvalues && (idx || v)) {
1150     /*
1151         allocate enough space to hold information from the longest row.
1152     */
1153     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1154     int     max = 1,m = mat->m,tmp;
1155     for ( i=0; i<m; i++ ) {
1156       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1157       if (max < tmp) { max = tmp; }
1158     }
1159     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1160     CHKPTRQ(mat->rowvalues);
1161     mat->rowindices = (int *) (mat->rowvalues + max);
1162   }
1163 
1164   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1165   lrow = row - rstart;
1166 
1167   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1168   if (!v)   {pvA = 0; pvB = 0;}
1169   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1170   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1171   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1172   nztot = nzA + nzB;
1173 
1174   cmap  = mat->garray;
1175   if (v  || idx) {
1176     if (nztot) {
1177       /* Sort by increasing column numbers, assuming A and B already sorted */
1178       int imark = -1;
1179       if (v) {
1180         *v = v_p = mat->rowvalues;
1181         for ( i=0; i<nzB; i++ ) {
1182           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1183           else break;
1184         }
1185         imark = i;
1186         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1187         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1188       }
1189       if (idx) {
1190         *idx = idx_p = mat->rowindices;
1191         if (imark > -1) {
1192           for ( i=0; i<imark; i++ ) {
1193             idx_p[i] = cmap[cworkB[i]];
1194           }
1195         } else {
1196           for ( i=0; i<nzB; i++ ) {
1197             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1198             else break;
1199           }
1200           imark = i;
1201         }
1202         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1203         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1204       }
1205     }
1206     else {
1207       if (idx) *idx = 0;
1208       if (v)   *v   = 0;
1209     }
1210   }
1211   *nz = nztot;
1212   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1213   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNC__
1218 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1219 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1220 {
1221   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1222 
1223   PetscFunctionBegin;
1224   if (aij->getrowactive == PETSC_FALSE) {
1225     SETERRQ(1,0,"MatGetRow not called");
1226   }
1227   aij->getrowactive = PETSC_FALSE;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNC__
1232 #define __FUNC__ "MatNorm_MPIAIJ"
1233 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1234 {
1235   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1236   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1237   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1238   double     sum = 0.0;
1239   Scalar     *v;
1240 
1241   PetscFunctionBegin;
1242   if (aij->size == 1) {
1243     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1244   } else {
1245     if (type == NORM_FROBENIUS) {
1246       v = amat->a;
1247       for (i=0; i<amat->nz; i++ ) {
1248 #if defined(USE_PETSC_COMPLEX)
1249         sum += real(conj(*v)*(*v)); v++;
1250 #else
1251         sum += (*v)*(*v); v++;
1252 #endif
1253       }
1254       v = bmat->a;
1255       for (i=0; i<bmat->nz; i++ ) {
1256 #if defined(USE_PETSC_COMPLEX)
1257         sum += real(conj(*v)*(*v)); v++;
1258 #else
1259         sum += (*v)*(*v); v++;
1260 #endif
1261       }
1262       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1263       *norm = sqrt(*norm);
1264     } else if (type == NORM_1) { /* max column norm */
1265       double *tmp, *tmp2;
1266       int    *jj, *garray = aij->garray;
1267       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1268       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1269       PetscMemzero(tmp,aij->N*sizeof(double));
1270       *norm = 0.0;
1271       v = amat->a; jj = amat->j;
1272       for ( j=0; j<amat->nz; j++ ) {
1273         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1274       }
1275       v = bmat->a; jj = bmat->j;
1276       for ( j=0; j<bmat->nz; j++ ) {
1277         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1278       }
1279       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1280       for ( j=0; j<aij->N; j++ ) {
1281         if (tmp2[j] > *norm) *norm = tmp2[j];
1282       }
1283       PetscFree(tmp); PetscFree(tmp2);
1284     } else if (type == NORM_INFINITY) { /* max row norm */
1285       double ntemp = 0.0;
1286       for ( j=0; j<amat->m; j++ ) {
1287         v = amat->a + amat->i[j] + shift;
1288         sum = 0.0;
1289         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1290           sum += PetscAbsScalar(*v); v++;
1291         }
1292         v = bmat->a + bmat->i[j] + shift;
1293         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1294           sum += PetscAbsScalar(*v); v++;
1295         }
1296         if (sum > ntemp) ntemp = sum;
1297       }
1298       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1299     } else {
1300       SETERRQ(1,0,"No support for two norm");
1301     }
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNC__
1307 #define __FUNC__ "MatTranspose_MPIAIJ"
1308 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1309 {
1310   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1311   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1312   int        ierr,shift = Aloc->indexshift;
1313   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1314   Mat        B;
1315   Scalar     *array;
1316 
1317   PetscFunctionBegin;
1318   if (matout == PETSC_NULL && M != N) {
1319     SETERRQ(1,0,"Square matrix only for in-place");
1320   }
1321 
1322   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1323 
1324   /* copy over the A part */
1325   Aloc = (Mat_SeqAIJ*) a->A->data;
1326   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1327   row = a->rstart;
1328   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1329   for ( i=0; i<m; i++ ) {
1330     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1331     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1332   }
1333   aj = Aloc->j;
1334   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1335 
1336   /* copy over the B part */
1337   Aloc = (Mat_SeqAIJ*) a->B->data;
1338   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1339   row = a->rstart;
1340   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1341   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1342   for ( i=0; i<m; i++ ) {
1343     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1344     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1345   }
1346   PetscFree(ct);
1347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1349   if (matout != PETSC_NULL) {
1350     *matout = B;
1351   } else {
1352     /* This isn't really an in-place transpose .... but free data structures from a */
1353     PetscFree(a->rowners);
1354     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1355     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1356     if (a->colmap) PetscFree(a->colmap);
1357     if (a->garray) PetscFree(a->garray);
1358     if (a->lvec) VecDestroy(a->lvec);
1359     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1360     PetscFree(a);
1361     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1362     PetscHeaderDestroy(B);
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNC__
1368 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1369 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1370 {
1371   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1372   Mat a = aij->A, b = aij->B;
1373   int ierr,s1,s2,s3;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1377   if (rr) {
1378     s3 = aij->n;
1379     VecGetLocalSize_Fast(rr,s1);
1380     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1381     /* Overlap communication with computation. */
1382     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1383   }
1384   if (ll) {
1385     VecGetLocalSize_Fast(ll,s1);
1386     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1387     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1388   }
1389   /* scale  the diagonal block */
1390   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1391 
1392   if (rr) {
1393     /* Do a scatter end and then right scale the off-diagonal block */
1394     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1395     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1396   }
1397 
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 
1402 extern int MatPrintHelp_SeqAIJ(Mat);
1403 #undef __FUNC__
1404 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1405 int MatPrintHelp_MPIAIJ(Mat A)
1406 {
1407   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1408   int        ierr;
1409 
1410   PetscFunctionBegin;
1411   if (!a->rank) {
1412     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNC__
1418 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1419 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1420 {
1421   PetscFunctionBegin;
1422   *bs = 1;
1423   PetscFunctionReturn(0);
1424 }
1425 #undef __FUNC__
1426 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1427 int MatSetUnfactored_MPIAIJ(Mat A)
1428 {
1429   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1430   int        ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNC__
1438 #define __FUNC__ "MatEqual_MPIAIJ"
1439 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1440 {
1441   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1442   Mat        a, b, c, d;
1443   PetscTruth flg;
1444   int        ierr;
1445 
1446   PetscFunctionBegin;
1447   if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type");
1448   a = matA->A; b = matA->B;
1449   c = matB->A; d = matB->B;
1450 
1451   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1452   if (flg == PETSC_TRUE) {
1453     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1454   }
1455   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1460 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1461 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1462 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1463 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *);
1464 
1465 /* -------------------------------------------------------------------*/
1466 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1467        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1468        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1469        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1470        0,0,
1471        0,0,
1472        0,0,
1473        MatRelax_MPIAIJ,
1474        MatTranspose_MPIAIJ,
1475        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1476        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1477        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1478        0,
1479        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1480        0,0,0,0,
1481        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1482        0,0,
1483        0,0,MatConvertSameType_MPIAIJ,0,0,
1484        0,0,0,
1485        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1486        MatPrintHelp_MPIAIJ,
1487        MatScale_MPIAIJ,0,0,0,
1488        MatGetBlockSize_MPIAIJ,0,0,0,0,
1489        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1490        0,0,MatGetSubMatrix_MPIAIJ};
1491 
1492 
1493 #undef __FUNC__
1494 #define __FUNC__ "MatCreateMPIAIJ"
1495 /*@C
1496    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1497    (the default parallel PETSc format).  For good matrix assembly performance
1498    the user should preallocate the matrix storage by setting the parameters
1499    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1500    performance can be increased by more than a factor of 50.
1501 
1502    Input Parameters:
1503 .  comm - MPI communicator
1504 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1505            This value should be the same as the local size used in creating the
1506            y vector for the matrix-vector product y = Ax.
1507 .  n - This value should be the same as the local size used in creating the
1508        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1509        calculated if N is given) For square matrices n is almost always m.
1510 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1511 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1512 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1513            (same for all local rows)
1514 .  d_nzz - array containing the number of nonzeros in the various rows of the
1515            diagonal portion of the local submatrix (possibly different for each row)
1516            or PETSC_NULL. You must leave room for the diagonal entry even if
1517            it is zero.
1518 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1519            submatrix (same for all local rows).
1520 .  o_nzz - array containing the number of nonzeros in the various rows of the
1521            off-diagonal portion of the local submatrix (possibly different for
1522            each row) or PETSC_NULL.
1523 
1524    Output Parameter:
1525 .  A - the matrix
1526 
1527    Notes:
1528    The AIJ format (also called the Yale sparse matrix format or
1529    compressed row storage), is fully compatible with standard Fortran 77
1530    storage.  That is, the stored row and column indices can begin at
1531    either one (as in Fortran) or zero.  See the users manual for details.
1532 
1533    The user MUST specify either the local or global matrix dimensions
1534    (possibly both).
1535 
1536    By default, this format uses inodes (identical nodes) when possible.
1537    We search for consecutive rows with the same nonzero structure, thereby
1538    reusing matrix information to achieve increased efficiency.
1539 
1540    Options Database Keys:
1541 $    -mat_aij_no_inode  - Do not use inodes
1542 $    -mat_aij_inode_limit <limit> - Set inode limit.
1543 $        (max limit=5)
1544 $    -mat_aij_oneindex - Internally use indexing starting at 1
1545 $        rather than 0.  Note: When calling MatSetValues(),
1546 $        the user still MUST index entries starting at 0!
1547 
1548    Storage Information:
1549    For a square global matrix we define each processor's diagonal portion
1550    to be its local rows and the corresponding columns (a square submatrix);
1551    each processor's off-diagonal portion encompasses the remainder of the
1552    local matrix (a rectangular submatrix).
1553 
1554    The user can specify preallocated storage for the diagonal part of
1555    the local submatrix with either d_nz or d_nnz (not both).  Set
1556    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1557    memory allocation.  Likewise, specify preallocated storage for the
1558    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1559 
1560    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1561    the figure below we depict these three local rows and all columns (0-11).
1562 
1563 $          0 1 2 3 4 5 6 7 8 9 10 11
1564 $         -------------------
1565 $  row 3  |  o o o d d d o o o o o o
1566 $  row 4  |  o o o d d d o o o o o o
1567 $  row 5  |  o o o d d d o o o o o o
1568 $         -------------------
1569 $
1570 
1571    Thus, any entries in the d locations are stored in the d (diagonal)
1572    submatrix, and any entries in the o locations are stored in the
1573    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1574    stored simply in the MATSEQAIJ format for compressed row storage.
1575 
1576    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1577    and o_nz should indicate the number of nonzeros per row in the o matrix.
1578    In general, for PDE problems in which most nonzeros are near the diagonal,
1579    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1580    or you will get TERRIBLE performance; see the users' manual chapter on
1581    matrices.
1582 
1583 .keywords: matrix, aij, compressed row, sparse, parallel
1584 
1585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1586 @*/
1587 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1588                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1589 {
1590   Mat          B;
1591   Mat_MPIAIJ   *b;
1592   int          ierr, i,sum[2],work[2],size;
1593 
1594   PetscFunctionBegin;
1595   MPI_Comm_size(comm,&size);
1596   if (size == 1) {
1597     if (M == PETSC_DECIDE) M = m;
1598     if (N == PETSC_DECIDE) N = n;
1599     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1600     PetscFunctionReturn(0);
1601   }
1602 
1603   *A = 0;
1604   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1605   PLogObjectCreate(B);
1606   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1607   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1608   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1609   B->destroy    = MatDestroy_MPIAIJ;
1610   B->view       = MatView_MPIAIJ;
1611   B->factor     = 0;
1612   B->assembled  = PETSC_FALSE;
1613   B->mapping    = 0;
1614 
1615   B->insertmode = NOT_SET_VALUES;
1616   b->size       = size;
1617   MPI_Comm_rank(comm,&b->rank);
1618 
1619   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1620     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1621   }
1622 
1623   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1624     work[0] = m; work[1] = n;
1625     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1626     if (M == PETSC_DECIDE) M = sum[0];
1627     if (N == PETSC_DECIDE) N = sum[1];
1628   }
1629   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1630   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1631   b->m = m; B->m = m;
1632   b->n = n; B->n = n;
1633   b->N = N; B->N = N;
1634   b->M = M; B->M = M;
1635 
1636   /* build local table of row and column ownerships */
1637   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1638   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1639   b->cowners = b->rowners + b->size + 2;
1640   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1641   b->rowners[0] = 0;
1642   for ( i=2; i<=b->size; i++ ) {
1643     b->rowners[i] += b->rowners[i-1];
1644   }
1645   b->rstart = b->rowners[b->rank];
1646   b->rend   = b->rowners[b->rank+1];
1647   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1648   b->cowners[0] = 0;
1649   for ( i=2; i<=b->size; i++ ) {
1650     b->cowners[i] += b->cowners[i-1];
1651   }
1652   b->cstart = b->cowners[b->rank];
1653   b->cend   = b->cowners[b->rank+1];
1654 
1655   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1656   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1657   PLogObjectParent(B,b->A);
1658   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1659   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1660   PLogObjectParent(B,b->B);
1661 
1662   /* build cache for off array entries formed */
1663   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1664   b->donotstash  = 0;
1665   b->colmap      = 0;
1666   b->garray      = 0;
1667   b->roworiented = 1;
1668 
1669   /* stuff used for matrix vector multiply */
1670   b->lvec      = 0;
1671   b->Mvctx     = 0;
1672 
1673   /* stuff for MatGetRow() */
1674   b->rowindices   = 0;
1675   b->rowvalues    = 0;
1676   b->getrowactive = PETSC_FALSE;
1677 
1678   *A = B;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNC__
1683 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1684 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1685 {
1686   Mat        mat;
1687   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1688   int        ierr, len=0, flg;
1689 
1690   PetscFunctionBegin;
1691   *newmat       = 0;
1692   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1693   PLogObjectCreate(mat);
1694   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1695   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1696   mat->destroy    = MatDestroy_MPIAIJ;
1697   mat->view       = MatView_MPIAIJ;
1698   mat->factor     = matin->factor;
1699   mat->assembled  = PETSC_TRUE;
1700 
1701   a->m = mat->m   = oldmat->m;
1702   a->n = mat->n   = oldmat->n;
1703   a->M = mat->M   = oldmat->M;
1704   a->N = mat->N   = oldmat->N;
1705 
1706   a->rstart       = oldmat->rstart;
1707   a->rend         = oldmat->rend;
1708   a->cstart       = oldmat->cstart;
1709   a->cend         = oldmat->cend;
1710   a->size         = oldmat->size;
1711   a->rank         = oldmat->rank;
1712   mat->insertmode = NOT_SET_VALUES;
1713   a->rowvalues    = 0;
1714   a->getrowactive = PETSC_FALSE;
1715 
1716   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1717   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1718   a->cowners = a->rowners + a->size + 2;
1719   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1720   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1721   if (oldmat->colmap) {
1722     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1723     PLogObjectMemory(mat,(a->N)*sizeof(int));
1724     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1725   } else a->colmap = 0;
1726   if (oldmat->garray) {
1727     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1728     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1729     PLogObjectMemory(mat,len*sizeof(int));
1730     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1731   } else a->garray = 0;
1732 
1733   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1734   PLogObjectParent(mat,a->lvec);
1735   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1736   PLogObjectParent(mat,a->Mvctx);
1737   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1738   PLogObjectParent(mat,a->A);
1739   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1740   PLogObjectParent(mat,a->B);
1741   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1742   if (flg) {
1743     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1744   }
1745   *newmat = mat;
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #include "sys.h"
1750 
1751 #undef __FUNC__
1752 #define __FUNC__ "MatLoad_MPIAIJ"
1753 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1754 {
1755   Mat          A;
1756   Scalar       *vals,*svals;
1757   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1758   MPI_Status   status;
1759   int          i, nz, ierr, j,rstart, rend, fd;
1760   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1761   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1762   int          tag = ((PetscObject)viewer)->tag;
1763 
1764   PetscFunctionBegin;
1765   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1766   if (!rank) {
1767     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1768     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1769     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1770     if (header[3] < 0) {
1771       SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ");
1772     }
1773   }
1774 
1775 
1776   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1777   M = header[1]; N = header[2];
1778   /* determine ownership of all rows */
1779   m = M/size + ((M % size) > rank);
1780   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1781   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1782   rowners[0] = 0;
1783   for ( i=2; i<=size; i++ ) {
1784     rowners[i] += rowners[i-1];
1785   }
1786   rstart = rowners[rank];
1787   rend   = rowners[rank+1];
1788 
1789   /* distribute row lengths to all processors */
1790   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1791   offlens = ourlens + (rend-rstart);
1792   if (!rank) {
1793     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1794     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1795     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1796     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1797     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1798     PetscFree(sndcounts);
1799   } else {
1800     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1801   }
1802 
1803   if (!rank) {
1804     /* calculate the number of nonzeros on each processor */
1805     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1806     PetscMemzero(procsnz,size*sizeof(int));
1807     for ( i=0; i<size; i++ ) {
1808       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1809         procsnz[i] += rowlengths[j];
1810       }
1811     }
1812     PetscFree(rowlengths);
1813 
1814     /* determine max buffer needed and allocate it */
1815     maxnz = 0;
1816     for ( i=0; i<size; i++ ) {
1817       maxnz = PetscMax(maxnz,procsnz[i]);
1818     }
1819     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1820 
1821     /* read in my part of the matrix column indices  */
1822     nz     = procsnz[0];
1823     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1824     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1825 
1826     /* read in every one elses and ship off */
1827     for ( i=1; i<size; i++ ) {
1828       nz   = procsnz[i];
1829       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1830       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1831     }
1832     PetscFree(cols);
1833   } else {
1834     /* determine buffer space needed for message */
1835     nz = 0;
1836     for ( i=0; i<m; i++ ) {
1837       nz += ourlens[i];
1838     }
1839     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1840 
1841     /* receive message of column indices*/
1842     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1843     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1844     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1845   }
1846 
1847   /* loop over local rows, determining number of off diagonal entries */
1848   PetscMemzero(offlens,m*sizeof(int));
1849   jj = 0;
1850   for ( i=0; i<m; i++ ) {
1851     for ( j=0; j<ourlens[i]; j++ ) {
1852       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1853       jj++;
1854     }
1855   }
1856 
1857   /* create our matrix */
1858   for ( i=0; i<m; i++ ) {
1859     ourlens[i] -= offlens[i];
1860   }
1861   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1862   A = *newmat;
1863   MatSetOption(A,MAT_COLUMNS_SORTED);
1864   for ( i=0; i<m; i++ ) {
1865     ourlens[i] += offlens[i];
1866   }
1867 
1868   if (!rank) {
1869     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1870 
1871     /* read in my part of the matrix numerical values  */
1872     nz = procsnz[0];
1873     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1874 
1875     /* insert into matrix */
1876     jj      = rstart;
1877     smycols = mycols;
1878     svals   = vals;
1879     for ( i=0; i<m; i++ ) {
1880       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1881       smycols += ourlens[i];
1882       svals   += ourlens[i];
1883       jj++;
1884     }
1885 
1886     /* read in other processors and ship out */
1887     for ( i=1; i<size; i++ ) {
1888       nz   = procsnz[i];
1889       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1890       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1891     }
1892     PetscFree(procsnz);
1893   } else {
1894     /* receive numeric values */
1895     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1896 
1897     /* receive message of values*/
1898     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1899     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1900     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1901 
1902     /* insert into matrix */
1903     jj      = rstart;
1904     smycols = mycols;
1905     svals   = vals;
1906     for ( i=0; i<m; i++ ) {
1907       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1908       smycols += ourlens[i];
1909       svals   += ourlens[i];
1910       jj++;
1911     }
1912   }
1913   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1914 
1915   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1916   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNC__
1921 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1922 /*
1923     Not great since it makes two copies of the submatrix, first an SeqAIJ
1924   in local and then by concatenating the local matrices the end result.
1925   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1926 */
1927 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat)
1928 {
1929   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1930   Mat        *local,M;
1931   Scalar     *vwork,*aa;
1932   MPI_Comm   comm = mat->comm;
1933   Mat_SeqAIJ *aij;
1934   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1935 
1936   PetscFunctionBegin;
1937   MPI_Comm_rank(comm,&rank);
1938   MPI_Comm_size(comm,&size);
1939 
1940   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1941 
1942   /*
1943       m - number of local rows
1944       n - number of columns (same on all processors)
1945       rstart - first row in new global matrix generated
1946   */
1947   ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr);
1948   if (call == MAT_INITIAL_MATRIX) {
1949     aij = (Mat_SeqAIJ *) (*local)->data;
1950     if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix");
1951     ii  = aij->i;
1952     jj  = aij->j;
1953 
1954     /*
1955         Determine the number of non-zeros in the diagonal and off-diagonal
1956         portions of the matrix in order to do correct preallocation
1957     */
1958 
1959     /* first get start and end of "diagonal" columns */
1960     nlocal = n/size + ((n % size) > rank);
1961     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1962     rstart = rend - nlocal;
1963 
1964     /* next, compute all the lengths */
1965     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
1966     olens = dlens + m;
1967     for ( i=0; i<m; i++ ) {
1968       jend = ii[i+1] - ii[i];
1969       olen = 0;
1970       dlen = 0;
1971       for ( j=0; j<jend; j++ ) {
1972         if ( *jj < rstart || *jj >= rend) olen++;
1973         else dlen++;
1974         jj++;
1975       }
1976       olens[i] = olen;
1977       dlens[i] = dlen;
1978     }
1979     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1980     PetscFree(dlens);
1981   } else {
1982     int ml,nl;
1983 
1984     M = *newmat;
1985     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1986     if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request");
1987     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1988   }
1989   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
1990   aij = (Mat_SeqAIJ *) (*local)->data;
1991   if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix");
1992   ii  = aij->i;
1993   jj  = aij->j;
1994   aa  = aij->a;
1995   for (i=0; i<m; i++) {
1996     row   = rstart + i;
1997     nz    = ii[i+1] - ii[i];
1998     cwork = jj;     jj += nz;
1999     vwork = aa;     aa += nz;
2000     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2001   }
2002 
2003   ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr);
2004   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2005   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2006   *newmat = M;
2007   PetscFunctionReturn(0);
2008 }
2009