xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e5b4c0cedeb32a7015a5ef3add4dc6b757c4adfe)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.99 1995/11/09 22:28:54 bsmith Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
86 {
87   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
88   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
89   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
90   int        cstart = aij->cstart, cend = aij->cend,row,col;
91   int        shift = C->indexshift;
92 
93   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
94   for ( i=0; i<m; i++ ) {
95     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
96     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
97     if (idxm[i] >= rstart && idxm[i] < rend) {
98       row = idxm[i] - rstart;
99       for ( j=0; j<n; j++ ) {
100         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
101         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
102         if (idxn[j] >= cstart && idxn[j] < cend){
103           col = idxn[j] - cstart;
104           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
105         }
106         else {
107           col = aij->colmap[idxn[j]] + shift;
108           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
109         }
110       }
111     }
112     else {
113       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
114     }
115   }
116   return 0;
117 }
118 
119 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
120 {
121   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
122   MPI_Comm    comm = mat->comm;
123   int         size = aij->size, *owners = aij->rowners;
124   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
125   MPI_Request *send_waits,*recv_waits;
126   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
127   InsertMode  addv;
128   Scalar      *rvalues,*svalues;
129 
130   /* make sure all processors are either in INSERTMODE or ADDMODE */
131   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
132   if (addv == (ADD_VALUES|INSERT_VALUES)) {
133     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
134   }
135   aij->insertmode = addv; /* in case this processor had no cache */
136 
137   /*  first count number of contributors to each processor */
138   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
139   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
140   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
141   for ( i=0; i<aij->stash.n; i++ ) {
142     idx = aij->stash.idx[i];
143     for ( j=0; j<size; j++ ) {
144       if (idx >= owners[j] && idx < owners[j+1]) {
145         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
146       }
147     }
148   }
149   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
150 
151   /* inform other processors of number of messages and max length*/
152   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
153   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
154   nreceives = work[rank];
155   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
156   nmax = work[rank];
157   PetscFree(work);
158 
159   /* post receives:
160        1) each message will consist of ordered pairs
161      (global index,value) we store the global index as a double
162      to simplify the message passing.
163        2) since we don't know how long each individual message is we
164      allocate the largest needed buffer for each receive. Potentially
165      this is a lot of wasted space.
166 
167 
168        This could be done better.
169   */
170   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
171   CHKPTRQ(rvalues);
172   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
173   CHKPTRQ(recv_waits);
174   for ( i=0; i<nreceives; i++ ) {
175     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
176               comm,recv_waits+i);
177   }
178 
179   /* do sends:
180       1) starts[i] gives the starting index in svalues for stuff going to
181          the ith processor
182   */
183   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
184   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
185   CHKPTRQ(send_waits);
186   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
187   starts[0] = 0;
188   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
189   for ( i=0; i<aij->stash.n; i++ ) {
190     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
191     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
192     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
193   }
194   PetscFree(owner);
195   starts[0] = 0;
196   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
197   count = 0;
198   for ( i=0; i<size; i++ ) {
199     if (procs[i]) {
200       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
201                 comm,send_waits+count++);
202     }
203   }
204   PetscFree(starts); PetscFree(nprocs);
205 
206   /* Free cache space */
207   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
208 
209   aij->svalues    = svalues;    aij->rvalues    = rvalues;
210   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
211   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
212   aij->rmax       = nmax;
213 
214   return 0;
215 }
216 extern int MatSetUpMultiply_MPIAIJ(Mat);
217 
218 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
219 {
220   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
221   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
222   MPI_Status  *send_status,recv_status;
223   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
224   int         row,col,other_disassembled,shift = C->indexshift;
225   Scalar      *values,val;
226   InsertMode  addv = aij->insertmode;
227 
228   /*  wait on receives */
229   while (count) {
230     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
231     /* unpack receives into our local space */
232     values = aij->rvalues + 3*imdex*aij->rmax;
233     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
234     n = n/3;
235     for ( i=0; i<n; i++ ) {
236       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
237       col = (int) PETSCREAL(values[3*i+1]);
238       val = values[3*i+2];
239       if (col >= aij->cstart && col < aij->cend) {
240         col -= aij->cstart;
241         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
242       }
243       else {
244         if (aij->assembled) {
245           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
246           col = aij->colmap[col] + shift;
247           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
249             col = (int) PETSCREAL(values[3*i+1]);
250           }
251         }
252         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
253       }
254     }
255     count--;
256   }
257   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
258 
259   /* wait on sends */
260   if (aij->nsends) {
261     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
262     CHKPTRQ(send_status);
263     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
264     PetscFree(send_status);
265   }
266   PetscFree(aij->send_waits); PetscFree(aij->svalues);
267 
268   aij->insertmode = NOT_SET_VALUES;
269   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
270   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
271 
272   /* determine if any processor has disassembled, if so we must
273      also disassemble ourselfs, in order that we may reassemble. */
274   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
275   if (aij->assembled && !other_disassembled) {
276     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
277   }
278 
279   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
280     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
281     aij->assembled = 1;
282   }
283   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
284   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
285 
286   return 0;
287 }
288 
289 static int MatZeroEntries_MPIAIJ(Mat A)
290 {
291   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
292   int        ierr;
293   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
294   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
295   return 0;
296 }
297 
298 /* the code does not do the diagonal entries correctly unless the
299    matrix is square and the column and row owerships are identical.
300    This is a BUG. The only way to fix it seems to be to access
301    aij->A and aij->B directly and not through the MatZeroRows()
302    routine.
303 */
304 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
305 {
306   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
307   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
308   int            *procs,*nprocs,j,found,idx,nsends,*work;
309   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
310   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
311   int            *lens,imdex,*lrows,*values;
312   MPI_Comm       comm = A->comm;
313   MPI_Request    *send_waits,*recv_waits;
314   MPI_Status     recv_status,*send_status;
315   IS             istmp;
316 
317   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
318   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
319   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
320 
321   /*  first count number of contributors to each processor */
322   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
323   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
324   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
325   for ( i=0; i<N; i++ ) {
326     idx = rows[i];
327     found = 0;
328     for ( j=0; j<size; j++ ) {
329       if (idx >= owners[j] && idx < owners[j+1]) {
330         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
331       }
332     }
333     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
334   }
335   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
336 
337   /* inform other processors of number of messages and max length*/
338   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
339   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
340   nrecvs = work[rank];
341   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
342   nmax = work[rank];
343   PetscFree(work);
344 
345   /* post receives:   */
346   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
347   CHKPTRQ(rvalues);
348   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
349   CHKPTRQ(recv_waits);
350   for ( i=0; i<nrecvs; i++ ) {
351     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
352   }
353 
354   /* do sends:
355       1) starts[i] gives the starting index in svalues for stuff going to
356          the ith processor
357   */
358   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
359   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
360   CHKPTRQ(send_waits);
361   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
362   starts[0] = 0;
363   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
364   for ( i=0; i<N; i++ ) {
365     svalues[starts[owner[i]]++] = rows[i];
366   }
367   ISRestoreIndices(is,&rows);
368 
369   starts[0] = 0;
370   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   count = 0;
372   for ( i=0; i<size; i++ ) {
373     if (procs[i]) {
374       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
375     }
376   }
377   PetscFree(starts);
378 
379   base = owners[rank];
380 
381   /*  wait on receives */
382   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
383   source = lens + nrecvs;
384   count  = nrecvs; slen = 0;
385   while (count) {
386     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
387     /* unpack receives into our local space */
388     MPI_Get_count(&recv_status,MPI_INT,&n);
389     source[imdex]  = recv_status.MPI_SOURCE;
390     lens[imdex]  = n;
391     slen += n;
392     count--;
393   }
394   PetscFree(recv_waits);
395 
396   /* move the data into the send scatter */
397   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
398   count = 0;
399   for ( i=0; i<nrecvs; i++ ) {
400     values = rvalues + i*nmax;
401     for ( j=0; j<lens[i]; j++ ) {
402       lrows[count++] = values[j] - base;
403     }
404   }
405   PetscFree(rvalues); PetscFree(lens);
406   PetscFree(owner); PetscFree(nprocs);
407 
408   /* actually zap the local rows */
409   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
410   PLogObjectParent(A,istmp);
411   PetscFree(lrows);
412   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
413   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
414   ierr = ISDestroy(istmp); CHKERRQ(ierr);
415 
416   /* wait on sends */
417   if (nsends) {
418     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
419     CHKPTRQ(send_status);
420     MPI_Waitall(nsends,send_waits,send_status);
421     PetscFree(send_status);
422   }
423   PetscFree(send_waits); PetscFree(svalues);
424 
425   return 0;
426 }
427 
428 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
429 {
430   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
431   int        ierr;
432 
433   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
434   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
435   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
436   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
437   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
438   return 0;
439 }
440 
441 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
442 {
443   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
444   int        ierr;
445   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
446   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
448   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
449   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
450   return 0;
451 }
452 
453 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
454 {
455   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
456   int        ierr;
457 
458   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
459   /* do nondiagonal part */
460   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
461   /* send it on its way */
462   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
463                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
464   /* do local part */
465   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
466   /* receive remote parts: note this assumes the values are not actually */
467   /* inserted in yy until the next line, which is true for my implementation*/
468   /* but is not perhaps always true. */
469   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
470                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
471   return 0;
472 }
473 
474 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
475 {
476   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478 
479   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
480   /* do nondiagonal part */
481   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
482   /* send it on its way */
483   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
484                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
485   /* do local part */
486   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
487   /* receive remote parts: note this assumes the values are not actually */
488   /* inserted in yy until the next line, which is true for my implementation*/
489   /* but is not perhaps always true. */
490   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
491                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
492   return 0;
493 }
494 
495 /*
496   This only works correctly for square matrices where the subblock A->A is the
497    diagonal block
498 */
499 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
500 {
501   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
502   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
503   return MatGetDiagonal(a->A,v);
504 }
505 
506 static int MatDestroy_MPIAIJ(PetscObject obj)
507 {
508   Mat        mat = (Mat) obj;
509   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
510   int        ierr;
511 #if defined(PETSC_LOG)
512   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
513 #endif
514   PetscFree(aij->rowners);
515   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
516   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
517   if (aij->colmap) PetscFree(aij->colmap);
518   if (aij->garray) PetscFree(aij->garray);
519   if (aij->lvec)   VecDestroy(aij->lvec);
520   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
521   PetscFree(aij);
522   PLogObjectDestroy(mat);
523   PetscHeaderDestroy(mat);
524   return 0;
525 }
526 #include "draw.h"
527 #include "pinclude/pviewer.h"
528 
529 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
530 {
531   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
532   int         ierr;
533 
534   if (aij->size == 1) {
535     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
536   }
537   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
538   return 0;
539 }
540 
541 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
542 {
543   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
545   int         ierr, format,shift = C->indexshift,rank;
546   PetscObject vobj = (PetscObject) viewer;
547   FILE        *fd;
548 
549   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
550     ierr = ViewerFileGetFormat_Private(viewer,&format);
551     if (format == FILE_FORMAT_INFO_DETAILED) {
552       int nz,nzalloc,mem;
553       MPI_Comm_rank(mat->comm,&rank);
554       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
555       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
556       MPIU_Seq_begin(mat->comm,1);
557       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
558                 nzalloc,mem);
559       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
560       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
561       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
562       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
563       fflush(fd);
564       MPIU_Seq_end(mat->comm,1);
565       ierr = VecScatterView(aij->Mvctx,viewer);
566       return 0;
567     }
568     else if (format == FILE_FORMAT_INFO) {
569       return 0;
570     }
571   }
572 
573   if (vobj->type == ASCII_FILE_VIEWER) {
574     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
575     MPIU_Seq_begin(mat->comm,1);
576     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
577            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
578            aij->cend);
579     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
580     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
581     fflush(fd);
582     MPIU_Seq_end(mat->comm,1);
583   }
584   else {
585     int size = aij->size;
586     rank = aij->rank;
587     if (size == 1) {
588       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
589     }
590     else {
591       /* assemble the entire matrix onto first processor. */
592       Mat         A;
593       Mat_SeqAIJ *Aloc;
594       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
595       Scalar      *a;
596 
597       if (!rank) {
598         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
599       }
600       else {
601         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
602       }
603       PLogObjectParent(mat,A);
604 
605 
606       /* copy over the A part */
607       Aloc = (Mat_SeqAIJ*) aij->A->data;
608       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
609       row = aij->rstart;
610       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
611       for ( i=0; i<m; i++ ) {
612         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
613         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
614       }
615       aj = Aloc->j;
616       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
617 
618       /* copy over the B part */
619       Aloc = (Mat_SeqAIJ*) aij->B->data;
620       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
621       row = aij->rstart;
622       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
623       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
624       for ( i=0; i<m; i++ ) {
625         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
626         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
627       }
628       PetscFree(ct);
629       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
630       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
631       if (!rank) {
632         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
633       }
634       ierr = MatDestroy(A); CHKERRQ(ierr);
635     }
636   }
637   return 0;
638 }
639 
640 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
641 {
642   Mat         mat = (Mat) obj;
643   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
644   int         ierr;
645   PetscObject vobj = (PetscObject) viewer;
646 
647   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
648   if (!viewer) {
649     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
650   }
651   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
652   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
653     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
654   }
655   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
656     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
657   }
658   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
659     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
660   }
661   else if (vobj->cookie == DRAW_COOKIE) {
662     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
663   }
664   else if (vobj->type == BINARY_FILE_VIEWER) {
665     return MatView_MPIAIJ_Binary(mat,viewer);
666   }
667   return 0;
668 }
669 
670 extern int MatMarkDiag_SeqAIJ(Mat);
671 /*
672     This has to provide several versions.
673 
674      1) per sequential
675      2) a) use only local smoothing updating outer values only once.
676         b) local smoothing updating outer values each inner iteration
677      3) color updating out values betwen colors.
678 */
679 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
680                            double fshift,int its,Vec xx)
681 {
682   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
683   Mat        AA = mat->A, BB = mat->B;
684   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
685   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
686   int        ierr,*idx, *diag;
687   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
688   Vec        tt;
689 
690   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
691 
692   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
693   xs = x + shift; /* shift by one for index start of 1 */
694   ls = ls + shift;
695   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
696   diag = A->diag;
697   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
698     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
699   }
700   if (flag & SOR_EISENSTAT) {
701     /* Let  A = L + U + D; where L is lower trianglar,
702     U is upper triangular, E is diagonal; This routine applies
703 
704             (L + E)^{-1} A (U + E)^{-1}
705 
706     to a vector efficiently using Eisenstat's trick. This is for
707     the case of SSOR preconditioner, so E is D/omega where omega
708     is the relaxation factor.
709     */
710     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
711     PLogObjectParent(matin,tt);
712     VecGetArray(tt,&t);
713     scale = (2.0/omega) - 1.0;
714     /*  x = (E + U)^{-1} b */
715     VecSet(&zero,mat->lvec);
716     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
717                               mat->Mvctx); CHKERRQ(ierr);
718     for ( i=m-1; i>-1; i-- ) {
719       n    = A->i[i+1] - diag[i] - 1;
720       idx  = A->j + diag[i] + !shift;
721       v    = A->a + diag[i] + !shift;
722       sum  = b[i];
723       SPARSEDENSEMDOT(sum,xs,v,idx,n);
724       d    = fshift + A->a[diag[i]+shift];
725       n    = B->i[i+1] - B->i[i];
726       idx  = B->j + B->i[i] + shift;
727       v    = B->a + B->i[i] + shift;
728       SPARSEDENSEMDOT(sum,ls,v,idx,n);
729       x[i] = omega*(sum/d);
730     }
731     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
732                             mat->Mvctx); CHKERRQ(ierr);
733 
734     /*  t = b - (2*E - D)x */
735     v = A->a;
736     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
737 
738     /*  t = (E + L)^{-1}t */
739     ts = t + shift; /* shifted by one for index start of a or mat->j*/
740     diag = A->diag;
741     VecSet(&zero,mat->lvec);
742     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
743                                                  mat->Mvctx); CHKERRQ(ierr);
744     for ( i=0; i<m; i++ ) {
745       n    = diag[i] - A->i[i];
746       idx  = A->j + A->i[i] + shift;
747       v    = A->a + A->i[i] + shift;
748       sum  = t[i];
749       SPARSEDENSEMDOT(sum,ts,v,idx,n);
750       d    = fshift + A->a[diag[i]+shift];
751       n    = B->i[i+1] - B->i[i];
752       idx  = B->j + B->i[i] + shift;
753       v    = B->a + B->i[i] + shift;
754       SPARSEDENSEMDOT(sum,ls,v,idx,n);
755       t[i] = omega*(sum/d);
756     }
757     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
758                                                     mat->Mvctx); CHKERRQ(ierr);
759     /*  x = x + t */
760     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
761     VecDestroy(tt);
762     return 0;
763   }
764 
765 
766   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
767     if (flag & SOR_ZERO_INITIAL_GUESS) {
768       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
769     }
770     else {
771       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
772       CHKERRQ(ierr);
773       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
774       CHKERRQ(ierr);
775     }
776     while (its--) {
777       /* go down through the rows */
778       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
779                               mat->Mvctx); CHKERRQ(ierr);
780       for ( i=0; i<m; i++ ) {
781         n    = A->i[i+1] - A->i[i];
782         idx  = A->j + A->i[i] + shift;
783         v    = A->a + A->i[i] + shift;
784         sum  = b[i];
785         SPARSEDENSEMDOT(sum,xs,v,idx,n);
786         d    = fshift + A->a[diag[i]+shift];
787         n    = B->i[i+1] - B->i[i];
788         idx  = B->j + B->i[i] + shift;
789         v    = B->a + B->i[i] + shift;
790         SPARSEDENSEMDOT(sum,ls,v,idx,n);
791         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
792       }
793       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
794                             mat->Mvctx); CHKERRQ(ierr);
795       /* come up through the rows */
796       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
797                               mat->Mvctx); CHKERRQ(ierr);
798       for ( i=m-1; i>-1; i-- ) {
799         n    = A->i[i+1] - A->i[i];
800         idx  = A->j + A->i[i] + shift;
801         v    = A->a + A->i[i] + shift;
802         sum  = b[i];
803         SPARSEDENSEMDOT(sum,xs,v,idx,n);
804         d    = fshift + A->a[diag[i]+shift];
805         n    = B->i[i+1] - B->i[i];
806         idx  = B->j + B->i[i] + shift;
807         v    = B->a + B->i[i] + shift;
808         SPARSEDENSEMDOT(sum,ls,v,idx,n);
809         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
810       }
811       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
812                             mat->Mvctx); CHKERRQ(ierr);
813     }
814   }
815   else if (flag & SOR_FORWARD_SWEEP){
816     if (flag & SOR_ZERO_INITIAL_GUESS) {
817       VecSet(&zero,mat->lvec);
818       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
819                               mat->Mvctx); CHKERRQ(ierr);
820       for ( i=0; i<m; i++ ) {
821         n    = diag[i] - A->i[i];
822         idx  = A->j + A->i[i] + shift;
823         v    = A->a + A->i[i] + shift;
824         sum  = b[i];
825         SPARSEDENSEMDOT(sum,xs,v,idx,n);
826         d    = fshift + A->a[diag[i]+shift];
827         n    = B->i[i+1] - B->i[i];
828         idx  = B->j + B->i[i] + shift;
829         v    = B->a + B->i[i] + shift;
830         SPARSEDENSEMDOT(sum,ls,v,idx,n);
831         x[i] = omega*(sum/d);
832       }
833       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
834                             mat->Mvctx); CHKERRQ(ierr);
835       its--;
836     }
837     while (its--) {
838       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
839       CHKERRQ(ierr);
840       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
841       CHKERRQ(ierr);
842       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
843                               mat->Mvctx); CHKERRQ(ierr);
844       for ( i=0; i<m; i++ ) {
845         n    = A->i[i+1] - A->i[i];
846         idx  = A->j + A->i[i] + shift;
847         v    = A->a + A->i[i] + shift;
848         sum  = b[i];
849         SPARSEDENSEMDOT(sum,xs,v,idx,n);
850         d    = fshift + A->a[diag[i]+shift];
851         n    = B->i[i+1] - B->i[i];
852         idx  = B->j + B->i[i] + shift;
853         v    = B->a + B->i[i] + shift;
854         SPARSEDENSEMDOT(sum,ls,v,idx,n);
855         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
856       }
857       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
858                             mat->Mvctx); CHKERRQ(ierr);
859     }
860   }
861   else if (flag & SOR_BACKWARD_SWEEP){
862     if (flag & SOR_ZERO_INITIAL_GUESS) {
863       VecSet(&zero,mat->lvec);
864       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
865                               mat->Mvctx); CHKERRQ(ierr);
866       for ( i=m-1; i>-1; i-- ) {
867         n    = A->i[i+1] - diag[i] - 1;
868         idx  = A->j + diag[i] + !shift;
869         v    = A->a + diag[i] + !shift;
870         sum  = b[i];
871         SPARSEDENSEMDOT(sum,xs,v,idx,n);
872         d    = fshift + A->a[diag[i]+shift];
873         n    = B->i[i+1] - B->i[i];
874         idx  = B->j + B->i[i] + shift;
875         v    = B->a + B->i[i] + shift;
876         SPARSEDENSEMDOT(sum,ls,v,idx,n);
877         x[i] = omega*(sum/d);
878       }
879       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
880                             mat->Mvctx); CHKERRQ(ierr);
881       its--;
882     }
883     while (its--) {
884       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
885                             mat->Mvctx); CHKERRQ(ierr);
886       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
887                             mat->Mvctx); CHKERRQ(ierr);
888       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
889                               mat->Mvctx); CHKERRQ(ierr);
890       for ( i=m-1; i>-1; i-- ) {
891         n    = A->i[i+1] - A->i[i];
892         idx  = A->j + A->i[i] + shift;
893         v    = A->a + A->i[i] + shift;
894         sum  = b[i];
895         SPARSEDENSEMDOT(sum,xs,v,idx,n);
896         d    = fshift + A->a[diag[i]+shift];
897         n    = B->i[i+1] - B->i[i];
898         idx  = B->j + B->i[i] + shift;
899         v    = B->a + B->i[i] + shift;
900         SPARSEDENSEMDOT(sum,ls,v,idx,n);
901         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
902       }
903       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
904                             mat->Mvctx); CHKERRQ(ierr);
905     }
906   }
907   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
908     if (flag & SOR_ZERO_INITIAL_GUESS) {
909       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
910     }
911     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
912     CHKERRQ(ierr);
913     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
914     CHKERRQ(ierr);
915     while (its--) {
916       /* go down through the rows */
917       for ( i=0; i<m; i++ ) {
918         n    = A->i[i+1] - A->i[i];
919         idx  = A->j + A->i[i] + shift;
920         v    = A->a + A->i[i] + shift;
921         sum  = b[i];
922         SPARSEDENSEMDOT(sum,xs,v,idx,n);
923         d    = fshift + A->a[diag[i]+shift];
924         n    = B->i[i+1] - B->i[i];
925         idx  = B->j + B->i[i] + shift;
926         v    = B->a + B->i[i] + shift;
927         SPARSEDENSEMDOT(sum,ls,v,idx,n);
928         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
929       }
930       /* come up through the rows */
931       for ( i=m-1; i>-1; i-- ) {
932         n    = A->i[i+1] - A->i[i];
933         idx  = A->j + A->i[i] + shift;
934         v    = A->a + A->i[i] + shift;
935         sum  = b[i];
936         SPARSEDENSEMDOT(sum,xs,v,idx,n);
937         d    = fshift + A->a[diag[i]+shift];
938         n    = B->i[i+1] - B->i[i];
939         idx  = B->j + B->i[i] + shift;
940         v    = B->a + B->i[i] + shift;
941         SPARSEDENSEMDOT(sum,ls,v,idx,n);
942         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
943       }
944     }
945   }
946   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
947     if (flag & SOR_ZERO_INITIAL_GUESS) {
948       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
949     }
950     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
951     CHKERRQ(ierr);
952     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
953     CHKERRQ(ierr);
954     while (its--) {
955       for ( i=0; i<m; i++ ) {
956         n    = A->i[i+1] - A->i[i];
957         idx  = A->j + A->i[i] + shift;
958         v    = A->a + A->i[i] + shift;
959         sum  = b[i];
960         SPARSEDENSEMDOT(sum,xs,v,idx,n);
961         d    = fshift + A->a[diag[i]+shift];
962         n    = B->i[i+1] - B->i[i];
963         idx  = B->j + B->i[i] + shift;
964         v    = B->a + B->i[i] + shift;
965         SPARSEDENSEMDOT(sum,ls,v,idx,n);
966         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
967       }
968     }
969   }
970   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
971     if (flag & SOR_ZERO_INITIAL_GUESS) {
972       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
973     }
974     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
975                             mat->Mvctx); CHKERRQ(ierr);
976     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
977                             mat->Mvctx); CHKERRQ(ierr);
978     while (its--) {
979       for ( i=m-1; i>-1; i-- ) {
980         n    = A->i[i+1] - A->i[i];
981         idx  = A->j + A->i[i] + shift;
982         v    = A->a + A->i[i] + shift;
983         sum  = b[i];
984         SPARSEDENSEMDOT(sum,xs,v,idx,n);
985         d    = fshift + A->a[diag[i]+shift];
986         n    = B->i[i+1] - B->i[i];
987         idx  = B->j + B->i[i] + shift;
988         v    = B->a + B->i[i] + shift;
989         SPARSEDENSEMDOT(sum,ls,v,idx,n);
990         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
991       }
992     }
993   }
994   return 0;
995 }
996 
997 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
998                              int *nzalloc,int *mem)
999 {
1000   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1001   Mat        A = mat->A, B = mat->B;
1002   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1003 
1004   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1005   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1006   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1007   if (flag == MAT_LOCAL) {
1008     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1009   } else if (flag == MAT_GLOBAL_MAX) {
1010     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1011     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1012   } else if (flag == MAT_GLOBAL_SUM) {
1013     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1014     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1015   }
1016   return 0;
1017 }
1018 
1019 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1020 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1021 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1022 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1023 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1024 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1025 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1026 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1027 
1028 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1029 {
1030   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1031 
1032   if (op == NO_NEW_NONZERO_LOCATIONS ||
1033       op == YES_NEW_NONZERO_LOCATIONS ||
1034       op == COLUMNS_SORTED ||
1035       op == ROW_ORIENTED) {
1036         MatSetOption(a->A,op);
1037         MatSetOption(a->B,op);
1038   }
1039   else if (op == ROWS_SORTED ||
1040            op == SYMMETRIC_MATRIX ||
1041            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1042            op == YES_NEW_DIAGONALS)
1043     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1044   else if (op == COLUMN_ORIENTED)
1045     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1046   else if (op == NO_NEW_DIAGONALS)
1047     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1048   else
1049     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1050   return 0;
1051 }
1052 
1053 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1054 {
1055   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1056   *m = mat->M; *n = mat->N;
1057   return 0;
1058 }
1059 
1060 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1061 {
1062   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1063   *m = mat->m; *n = mat->N;
1064   return 0;
1065 }
1066 
1067 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1068 {
1069   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1070   *m = mat->rstart; *n = mat->rend;
1071   return 0;
1072 }
1073 
1074 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1075 {
1076   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1077   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1078   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1079   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1080 
1081   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1082   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1083   lrow = row - rstart;
1084 
1085   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1086   if (!v)   {pvA = 0; pvB = 0;}
1087   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1088   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1089   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1090   nztot = nzA + nzB;
1091 
1092   if (v  || idx) {
1093     if (nztot) {
1094       /* Sort by increasing column numbers, assuming A and B already sorted */
1095       int imark;
1096       if (mat->assembled) {
1097         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1098       }
1099       if (v) {
1100         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1101         for ( i=0; i<nzB; i++ ) {
1102           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1103           else break;
1104         }
1105         imark = i;
1106         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1107         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1108       }
1109       if (idx) {
1110         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1111         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1112         for ( i=0; i<nzB; i++ ) {
1113           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1114           else break;
1115         }
1116         imark = i;
1117         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1118         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1119       }
1120     }
1121     else {*idx = 0; *v=0;}
1122   }
1123   *nz = nztot;
1124   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1125   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1126   return 0;
1127 }
1128 
1129 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1130 {
1131   if (idx) PetscFree(*idx);
1132   if (v) PetscFree(*v);
1133   return 0;
1134 }
1135 
1136 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1137 {
1138   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1139   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1140   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1141   double     sum = 0.0;
1142   Scalar     *v;
1143 
1144   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1145   if (aij->size == 1) {
1146     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1147   } else {
1148     if (type == NORM_FROBENIUS) {
1149       v = amat->a;
1150       for (i=0; i<amat->nz; i++ ) {
1151 #if defined(PETSC_COMPLEX)
1152         sum += real(conj(*v)*(*v)); v++;
1153 #else
1154         sum += (*v)*(*v); v++;
1155 #endif
1156       }
1157       v = bmat->a;
1158       for (i=0; i<bmat->nz; i++ ) {
1159 #if defined(PETSC_COMPLEX)
1160         sum += real(conj(*v)*(*v)); v++;
1161 #else
1162         sum += (*v)*(*v); v++;
1163 #endif
1164       }
1165       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1166       *norm = sqrt(*norm);
1167     }
1168     else if (type == NORM_1) { /* max column norm */
1169       double *tmp, *tmp2;
1170       int    *jj, *garray = aij->garray;
1171       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1172       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1173       PetscMemzero(tmp,aij->N*sizeof(double));
1174       *norm = 0.0;
1175       v = amat->a; jj = amat->j;
1176       for ( j=0; j<amat->nz; j++ ) {
1177         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1178       }
1179       v = bmat->a; jj = bmat->j;
1180       for ( j=0; j<bmat->nz; j++ ) {
1181         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1182       }
1183       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1184       for ( j=0; j<aij->N; j++ ) {
1185         if (tmp2[j] > *norm) *norm = tmp2[j];
1186       }
1187       PetscFree(tmp); PetscFree(tmp2);
1188     }
1189     else if (type == NORM_INFINITY) { /* max row norm */
1190       double ntemp = 0.0;
1191       for ( j=0; j<amat->m; j++ ) {
1192         v = amat->a + amat->i[j] + shift;
1193         sum = 0.0;
1194         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1195           sum += PetscAbsScalar(*v); v++;
1196         }
1197         v = bmat->a + bmat->i[j] + shift;
1198         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1199           sum += PetscAbsScalar(*v); v++;
1200         }
1201         if (sum > ntemp) ntemp = sum;
1202       }
1203       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1204     }
1205     else {
1206       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1207     }
1208   }
1209   return 0;
1210 }
1211 
1212 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1213 {
1214   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1215   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1216   int        ierr,shift = Aloc->indexshift;
1217   Mat        B;
1218   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1219   Scalar     *array;
1220 
1221   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1222   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1223   CHKERRQ(ierr);
1224 
1225   /* copy over the A part */
1226   Aloc = (Mat_SeqAIJ*) a->A->data;
1227   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1228   row = a->rstart;
1229   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1230   for ( i=0; i<m; i++ ) {
1231     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1232     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1233   }
1234   aj = Aloc->j;
1235   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1236 
1237   /* copy over the B part */
1238   Aloc = (Mat_SeqAIJ*) a->B->data;
1239   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1240   row = a->rstart;
1241   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1242   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1243   for ( i=0; i<m; i++ ) {
1244     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1245     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1246   }
1247   PetscFree(ct);
1248   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1249   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1250   if (matout) {
1251     *matout = B;
1252   } else {
1253     /* This isn't really an in-place transpose .... but free data structures from a */
1254     PetscFree(a->rowners);
1255     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1256     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1257     if (a->colmap) PetscFree(a->colmap);
1258     if (a->garray) PetscFree(a->garray);
1259     if (a->lvec) VecDestroy(a->lvec);
1260     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1261     PetscFree(a);
1262     PetscMemcpy(A,B,sizeof(struct _Mat));
1263     PetscHeaderDestroy(B);
1264   }
1265   return 0;
1266 }
1267 
1268 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1269 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int);
1270 
1271 /* -------------------------------------------------------------------*/
1272 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1273        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1274        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1275        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1276        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1277        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1278        MatLUFactor_MPIAIJ,0,
1279        MatRelax_MPIAIJ,
1280        MatTranspose_MPIAIJ,
1281        MatGetInfo_MPIAIJ,0,
1282        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1283        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1284        0,
1285        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1286        MatGetReordering_MPIAIJ,
1287        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1288        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1289        MatILUFactorSymbolic_MPIAIJ,0,
1290        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ,0,0,
1291        0,0,0,
1292        0,0,MatGetValues_MPIAIJ};
1293 
1294 /*@C
1295    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1296    (the default parallel PETSc format).
1297 
1298    Input Parameters:
1299 .  comm - MPI communicator
1300 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1301 .  n - number of local columns (or PETSC_DECIDE to have calculated
1302            if N is given)
1303 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1304 .  N - number of global columns (or PETSC_DECIDE to have calculated
1305            if n is given)
1306 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1307            (same for all local rows)
1308 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1309            or null (possibly different for each row).  You must leave room
1310            for the diagonal entry even if it is zero.
1311 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1312            submatrix (same for all local rows).
1313 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1314            submatrix or null (possibly different for each row).
1315 
1316    Output Parameter:
1317 .  newmat - the matrix
1318 
1319    Notes:
1320    The AIJ format (also called the Yale sparse matrix format or
1321    compressed row storage), is fully compatible with standard Fortran 77
1322    storage.  That is, the stored row and column indices can begin at
1323    either one (as in Fortran) or zero.  See the users manual for details.
1324 
1325    The user MUST specify either the local or global matrix dimensions
1326    (possibly both).
1327 
1328    Storage Information:
1329    For a square global matrix we define each processor's diagonal portion
1330    to be its local rows and the corresponding columns (a square submatrix);
1331    each processor's off-diagonal portion encompasses the remainder of the
1332    local matrix (a rectangular submatrix).
1333 
1334    The user can specify preallocated storage for the diagonal part of
1335    the local submatrix with either d_nz or d_nnz (not both). Set both
1336    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1337    Likewise, specify preallocated storage for the off-diagonal part of
1338    the local submatrix with o_nz or o_nnz (not both).
1339 
1340 .keywords: matrix, aij, compressed row, sparse, parallel
1341 
1342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1343 @*/
1344 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1345                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1346 {
1347   Mat          mat;
1348   Mat_MPIAIJ   *a;
1349   int          ierr, i,sum[2],work[2];
1350 
1351   *newmat         = 0;
1352   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1353   PLogObjectCreate(mat);
1354   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1355   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1356   mat->destroy    = MatDestroy_MPIAIJ;
1357   mat->view       = MatView_MPIAIJ;
1358   mat->factor     = 0;
1359 
1360   a->insertmode = NOT_SET_VALUES;
1361   MPI_Comm_rank(comm,&a->rank);
1362   MPI_Comm_size(comm,&a->size);
1363 
1364   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1365     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1366 
1367   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1368     work[0] = m; work[1] = n;
1369     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1370     if (M == PETSC_DECIDE) M = sum[0];
1371     if (N == PETSC_DECIDE) N = sum[1];
1372   }
1373   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1374   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1375   a->m = m;
1376   a->n = n;
1377   a->N = N;
1378   a->M = M;
1379 
1380   /* build local table of row and column ownerships */
1381   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1382   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1383   a->cowners = a->rowners + a->size + 1;
1384   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1385   a->rowners[0] = 0;
1386   for ( i=2; i<=a->size; i++ ) {
1387     a->rowners[i] += a->rowners[i-1];
1388   }
1389   a->rstart = a->rowners[a->rank];
1390   a->rend   = a->rowners[a->rank+1];
1391   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1392   a->cowners[0] = 0;
1393   for ( i=2; i<=a->size; i++ ) {
1394     a->cowners[i] += a->cowners[i-1];
1395   }
1396   a->cstart = a->cowners[a->rank];
1397   a->cend   = a->cowners[a->rank+1];
1398 
1399   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1400   PLogObjectParent(mat,a->A);
1401   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1402   PLogObjectParent(mat,a->B);
1403 
1404   /* build cache for off array entries formed */
1405   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1406   a->colmap    = 0;
1407   a->garray    = 0;
1408 
1409   /* stuff used for matrix vector multiply */
1410   a->lvec      = 0;
1411   a->Mvctx     = 0;
1412   a->assembled = 0;
1413 
1414   *newmat = mat;
1415   return 0;
1416 }
1417 
1418 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1419 {
1420   Mat        mat;
1421   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1422   int        ierr, len;
1423 
1424   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1425   *newmat       = 0;
1426   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1427   PLogObjectCreate(mat);
1428   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1429   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1430   mat->destroy  = MatDestroy_MPIAIJ;
1431   mat->view     = MatView_MPIAIJ;
1432   mat->factor   = matin->factor;
1433 
1434   a->m          = oldmat->m;
1435   a->n          = oldmat->n;
1436   a->M          = oldmat->M;
1437   a->N          = oldmat->N;
1438 
1439   a->assembled  = 1;
1440   a->rstart     = oldmat->rstart;
1441   a->rend       = oldmat->rend;
1442   a->cstart     = oldmat->cstart;
1443   a->cend       = oldmat->cend;
1444   a->size       = oldmat->size;
1445   a->rank       = oldmat->rank;
1446   a->insertmode = NOT_SET_VALUES;
1447 
1448   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1449   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1450   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1451   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1452   if (oldmat->colmap) {
1453     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1454     PLogObjectMemory(mat,(a->N)*sizeof(int));
1455     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1456   } else a->colmap = 0;
1457   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1458     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1459     PLogObjectMemory(mat,len*sizeof(int));
1460     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1461   } else a->garray = 0;
1462 
1463   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1464   PLogObjectParent(mat,a->lvec);
1465   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1466   PLogObjectParent(mat,a->Mvctx);
1467   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1468   PLogObjectParent(mat,a->A);
1469   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1470   PLogObjectParent(mat,a->B);
1471   *newmat = mat;
1472   return 0;
1473 }
1474 
1475 #include "sysio.h"
1476 
1477 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1478 {
1479   Mat          A;
1480   int          i, nz, ierr, j,rstart, rend, fd;
1481   Scalar       *vals,*svals;
1482   PetscObject  vobj = (PetscObject) bview;
1483   MPI_Comm     comm = vobj->comm;
1484   MPI_Status   status;
1485   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1486   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1487   int          tag = ((PetscObject)bview)->tag;
1488 
1489   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1490   if (!rank) {
1491     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1492     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1493     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1494   }
1495 
1496   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1497   M = header[1]; N = header[2];
1498   /* determine ownership of all rows */
1499   m = M/size + ((M % size) > rank);
1500   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1501   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1502   rowners[0] = 0;
1503   for ( i=2; i<=size; i++ ) {
1504     rowners[i] += rowners[i-1];
1505   }
1506   rstart = rowners[rank];
1507   rend   = rowners[rank+1];
1508 
1509   /* distribute row lengths to all processors */
1510   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1511   offlens = ourlens + (rend-rstart);
1512   if (!rank) {
1513     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1514     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1515     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1516     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1517     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1518     PetscFree(sndcounts);
1519   }
1520   else {
1521     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1522   }
1523 
1524   if (!rank) {
1525     /* calculate the number of nonzeros on each processor */
1526     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1527     PetscMemzero(procsnz,size*sizeof(int));
1528     for ( i=0; i<size; i++ ) {
1529       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1530         procsnz[i] += rowlengths[j];
1531       }
1532     }
1533     PetscFree(rowlengths);
1534 
1535     /* determine max buffer needed and allocate it */
1536     maxnz = 0;
1537     for ( i=0; i<size; i++ ) {
1538       maxnz = PetscMax(maxnz,procsnz[i]);
1539     }
1540     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1541 
1542     /* read in my part of the matrix column indices  */
1543     nz = procsnz[0];
1544     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1545     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1546 
1547     /* read in every one elses and ship off */
1548     for ( i=1; i<size; i++ ) {
1549       nz = procsnz[i];
1550       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1551       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1552     }
1553     PetscFree(cols);
1554   }
1555   else {
1556     /* determine buffer space needed for message */
1557     nz = 0;
1558     for ( i=0; i<m; i++ ) {
1559       nz += ourlens[i];
1560     }
1561     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1562 
1563     /* receive message of column indices*/
1564     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1565     MPI_Get_count(&status,MPI_INT,&maxnz);
1566     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1567   }
1568 
1569   /* loop over local rows, determining number of off diagonal entries */
1570   PetscMemzero(offlens,m*sizeof(int));
1571   jj = 0;
1572   for ( i=0; i<m; i++ ) {
1573     for ( j=0; j<ourlens[i]; j++ ) {
1574       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1575       jj++;
1576     }
1577   }
1578 
1579   /* create our matrix */
1580   for ( i=0; i<m; i++ ) {
1581     ourlens[i] -= offlens[i];
1582   }
1583   if (type == MATMPIAIJ) {
1584     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1585   }
1586   else if (type == MATMPIROW) {
1587     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1588   }
1589   A = *newmat;
1590   MatSetOption(A,COLUMNS_SORTED);
1591   for ( i=0; i<m; i++ ) {
1592     ourlens[i] += offlens[i];
1593   }
1594 
1595   if (!rank) {
1596     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1597 
1598     /* read in my part of the matrix numerical values  */
1599     nz = procsnz[0];
1600     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1601 
1602     /* insert into matrix */
1603     jj      = rstart;
1604     smycols = mycols;
1605     svals   = vals;
1606     for ( i=0; i<m; i++ ) {
1607       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1608       smycols += ourlens[i];
1609       svals   += ourlens[i];
1610       jj++;
1611     }
1612 
1613     /* read in other processors and ship out */
1614     for ( i=1; i<size; i++ ) {
1615       nz = procsnz[i];
1616       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1617       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1618     }
1619     PetscFree(procsnz);
1620   }
1621   else {
1622     /* receive numeric values */
1623     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1624 
1625     /* receive message of values*/
1626     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1627     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1628     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1629 
1630     /* insert into matrix */
1631     jj      = rstart;
1632     smycols = mycols;
1633     svals   = vals;
1634     for ( i=0; i<m; i++ ) {
1635       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1636       smycols += ourlens[i];
1637       svals   += ourlens[i];
1638       jj++;
1639     }
1640   }
1641   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1642 
1643   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1644   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1645   return 0;
1646 }
1647