xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 8965ea79aaf4087e157537e1c4a1c2d61645c0b7)
1*8965ea79SLois Curfman McInnes #ifndef lint
2*8965ea79SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.90 1995/10/17 02:21:15 curfman Exp curfman $";
3*8965ea79SLois Curfman McInnes #endif
4*8965ea79SLois Curfman McInnes 
5*8965ea79SLois Curfman McInnes #include "mpidense.h"
6*8965ea79SLois Curfman McInnes #include "vec/vecimpl.h"
7*8965ea79SLois Curfman McInnes #include "inline/spops.h"
8*8965ea79SLois Curfman McInnes 
9*8965ea79SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
10*8965ea79SLois Curfman McInnes                                int *idxn,Scalar *v,InsertMode addv)
11*8965ea79SLois Curfman McInnes {
12*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13*8965ea79SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
14*8965ea79SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
15*8965ea79SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
16*8965ea79SLois Curfman McInnes   int        shift = C->indexshift;
17*8965ea79SLois Curfman McInnes 
18*8965ea79SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
19*8965ea79SLois Curfman McInnes     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
20*8965ea79SLois Curfman McInnes   }
21*8965ea79SLois Curfman McInnes   aij->insertmode = addv;
22*8965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
23*8965ea79SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
24*8965ea79SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
25*8965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
26*8965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
27*8965ea79SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
28*8965ea79SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
29*8965ea79SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
30*8965ea79SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
31*8965ea79SLois Curfman McInnes           col = idxn[j] - cstart;
32*8965ea79SLois Curfman McInnes           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
33*8965ea79SLois Curfman McInnes         }
34*8965ea79SLois Curfman McInnes         else {
35*8965ea79SLois Curfman McInnes           if (aij->assembled) {
36*8965ea79SLois Curfman McInnes             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
37*8965ea79SLois Curfman McInnes             col = aij->colmap[idxn[j]] + shift;
38*8965ea79SLois Curfman McInnes             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
39*8965ea79SLois Curfman McInnes               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
40*8965ea79SLois Curfman McInnes               col =  idxn[j];
41*8965ea79SLois Curfman McInnes             }
42*8965ea79SLois Curfman McInnes           }
43*8965ea79SLois Curfman McInnes           else col = idxn[j];
44*8965ea79SLois Curfman McInnes           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
45*8965ea79SLois Curfman McInnes         }
46*8965ea79SLois Curfman McInnes       }
47*8965ea79SLois Curfman McInnes     }
48*8965ea79SLois Curfman McInnes     else {
49*8965ea79SLois Curfman McInnes       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
50*8965ea79SLois Curfman McInnes     }
51*8965ea79SLois Curfman McInnes   }
52*8965ea79SLois Curfman McInnes   return 0;
53*8965ea79SLois Curfman McInnes }
54*8965ea79SLois Curfman McInnes 
55*8965ea79SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
56*8965ea79SLois Curfman McInnes {
57*8965ea79SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
58*8965ea79SLois Curfman McInnes   MPI_Comm    comm = mat->comm;
59*8965ea79SLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
60*8965ea79SLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
61*8965ea79SLois Curfman McInnes   MPI_Request *send_waits,*recv_waits;
62*8965ea79SLois Curfman McInnes   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
63*8965ea79SLois Curfman McInnes   InsertMode  addv;
64*8965ea79SLois Curfman McInnes   Scalar      *rvalues,*svalues;
65*8965ea79SLois Curfman McInnes 
66*8965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
67*8965ea79SLois Curfman McInnes   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
68*8965ea79SLois Curfman McInnes   if (addv == (ADD_VALUES|INSERT_VALUES)) {
69*8965ea79SLois Curfman McInnes     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
70*8965ea79SLois Curfman McInnes   }
71*8965ea79SLois Curfman McInnes   aij->insertmode = addv; /* in case this processor had no cache */
72*8965ea79SLois Curfman McInnes 
73*8965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
74*8965ea79SLois Curfman McInnes   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
75*8965ea79SLois Curfman McInnes   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
76*8965ea79SLois Curfman McInnes   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
77*8965ea79SLois Curfman McInnes   for ( i=0; i<aij->stash.n; i++ ) {
78*8965ea79SLois Curfman McInnes     idx = aij->stash.idx[i];
79*8965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
80*8965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
81*8965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
82*8965ea79SLois Curfman McInnes       }
83*8965ea79SLois Curfman McInnes     }
84*8965ea79SLois Curfman McInnes   }
85*8965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
86*8965ea79SLois Curfman McInnes 
87*8965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
88*8965ea79SLois Curfman McInnes   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
89*8965ea79SLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
90*8965ea79SLois Curfman McInnes   nreceives = work[rank];
91*8965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
92*8965ea79SLois Curfman McInnes   nmax = work[rank];
93*8965ea79SLois Curfman McInnes   PETSCFREE(work);
94*8965ea79SLois Curfman McInnes 
95*8965ea79SLois Curfman McInnes   /* post receives:
96*8965ea79SLois Curfman McInnes        1) each message will consist of ordered pairs
97*8965ea79SLois Curfman McInnes      (global index,value) we store the global index as a double
98*8965ea79SLois Curfman McInnes      to simplify the message passing.
99*8965ea79SLois Curfman McInnes        2) since we don't know how long each individual message is we
100*8965ea79SLois Curfman McInnes      allocate the largest needed buffer for each receive. Potentially
101*8965ea79SLois Curfman McInnes      this is a lot of wasted space.
102*8965ea79SLois Curfman McInnes 
103*8965ea79SLois Curfman McInnes 
104*8965ea79SLois Curfman McInnes        This could be done better.
105*8965ea79SLois Curfman McInnes   */
106*8965ea79SLois Curfman McInnes   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
107*8965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
108*8965ea79SLois Curfman McInnes   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
109*8965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
110*8965ea79SLois Curfman McInnes   for ( i=0; i<nreceives; i++ ) {
111*8965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
112*8965ea79SLois Curfman McInnes               comm,recv_waits+i);
113*8965ea79SLois Curfman McInnes   }
114*8965ea79SLois Curfman McInnes 
115*8965ea79SLois Curfman McInnes   /* do sends:
116*8965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
117*8965ea79SLois Curfman McInnes          the ith processor
118*8965ea79SLois Curfman McInnes   */
119*8965ea79SLois Curfman McInnes   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
120*8965ea79SLois Curfman McInnes   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
121*8965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
122*8965ea79SLois Curfman McInnes   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
123*8965ea79SLois Curfman McInnes   starts[0] = 0;
124*8965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
125*8965ea79SLois Curfman McInnes   for ( i=0; i<aij->stash.n; i++ ) {
126*8965ea79SLois Curfman McInnes     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
127*8965ea79SLois Curfman McInnes     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
128*8965ea79SLois Curfman McInnes     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
129*8965ea79SLois Curfman McInnes   }
130*8965ea79SLois Curfman McInnes   PETSCFREE(owner);
131*8965ea79SLois Curfman McInnes   starts[0] = 0;
132*8965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
133*8965ea79SLois Curfman McInnes   count = 0;
134*8965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
135*8965ea79SLois Curfman McInnes     if (procs[i]) {
136*8965ea79SLois Curfman McInnes       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
137*8965ea79SLois Curfman McInnes                 comm,send_waits+count++);
138*8965ea79SLois Curfman McInnes     }
139*8965ea79SLois Curfman McInnes   }
140*8965ea79SLois Curfman McInnes   PETSCFREE(starts); PETSCFREE(nprocs);
141*8965ea79SLois Curfman McInnes 
142*8965ea79SLois Curfman McInnes   /* Free cache space */
143*8965ea79SLois Curfman McInnes   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
144*8965ea79SLois Curfman McInnes 
145*8965ea79SLois Curfman McInnes   aij->svalues    = svalues;    aij->rvalues    = rvalues;
146*8965ea79SLois Curfman McInnes   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
147*8965ea79SLois Curfman McInnes   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
148*8965ea79SLois Curfman McInnes   aij->rmax       = nmax;
149*8965ea79SLois Curfman McInnes 
150*8965ea79SLois Curfman McInnes   return 0;
151*8965ea79SLois Curfman McInnes }
152*8965ea79SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
153*8965ea79SLois Curfman McInnes 
154*8965ea79SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
155*8965ea79SLois Curfman McInnes {
156*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
157*8965ea79SLois Curfman McInnes   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
158*8965ea79SLois Curfman McInnes   MPI_Status  *send_status,recv_status;
159*8965ea79SLois Curfman McInnes   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
160*8965ea79SLois Curfman McInnes   int         row,col,other_disassembled,shift = C->indexshift;
161*8965ea79SLois Curfman McInnes   Scalar      *values,val;
162*8965ea79SLois Curfman McInnes   InsertMode  addv = aij->insertmode;
163*8965ea79SLois Curfman McInnes 
164*8965ea79SLois Curfman McInnes   /*  wait on receives */
165*8965ea79SLois Curfman McInnes   while (count) {
166*8965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
167*8965ea79SLois Curfman McInnes     /* unpack receives into our local space */
168*8965ea79SLois Curfman McInnes     values = aij->rvalues + 3*imdex*aij->rmax;
169*8965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
170*8965ea79SLois Curfman McInnes     n = n/3;
171*8965ea79SLois Curfman McInnes     for ( i=0; i<n; i++ ) {
172*8965ea79SLois Curfman McInnes       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
173*8965ea79SLois Curfman McInnes       col = (int) PETSCREAL(values[3*i+1]);
174*8965ea79SLois Curfman McInnes       val = values[3*i+2];
175*8965ea79SLois Curfman McInnes       if (col >= aij->cstart && col < aij->cend) {
176*8965ea79SLois Curfman McInnes         col -= aij->cstart;
177*8965ea79SLois Curfman McInnes         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
178*8965ea79SLois Curfman McInnes       }
179*8965ea79SLois Curfman McInnes       else {
180*8965ea79SLois Curfman McInnes         if (aij->assembled) {
181*8965ea79SLois Curfman McInnes           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
182*8965ea79SLois Curfman McInnes           col = aij->colmap[col] + shift;
183*8965ea79SLois Curfman McInnes           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
184*8965ea79SLois Curfman McInnes             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
185*8965ea79SLois Curfman McInnes             col = (int) PETSCREAL(values[3*i+1]);
186*8965ea79SLois Curfman McInnes           }
187*8965ea79SLois Curfman McInnes         }
188*8965ea79SLois Curfman McInnes         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
189*8965ea79SLois Curfman McInnes       }
190*8965ea79SLois Curfman McInnes     }
191*8965ea79SLois Curfman McInnes     count--;
192*8965ea79SLois Curfman McInnes   }
193*8965ea79SLois Curfman McInnes   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
194*8965ea79SLois Curfman McInnes 
195*8965ea79SLois Curfman McInnes   /* wait on sends */
196*8965ea79SLois Curfman McInnes   if (aij->nsends) {
197*8965ea79SLois Curfman McInnes     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
198*8965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
199*8965ea79SLois Curfman McInnes     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
200*8965ea79SLois Curfman McInnes     PETSCFREE(send_status);
201*8965ea79SLois Curfman McInnes   }
202*8965ea79SLois Curfman McInnes   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
203*8965ea79SLois Curfman McInnes 
204*8965ea79SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
205*8965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
206*8965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
207*8965ea79SLois Curfman McInnes 
208*8965ea79SLois Curfman McInnes   /* determine if any processor has disassembled, if so we must
209*8965ea79SLois Curfman McInnes      also disassemble ourselfs, in order that we may reassemble. */
210*8965ea79SLois Curfman McInnes   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
211*8965ea79SLois Curfman McInnes   if (aij->assembled && !other_disassembled) {
212*8965ea79SLois Curfman McInnes     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
213*8965ea79SLois Curfman McInnes   }
214*8965ea79SLois Curfman McInnes 
215*8965ea79SLois Curfman McInnes   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
216*8965ea79SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
217*8965ea79SLois Curfman McInnes     aij->assembled = 1;
218*8965ea79SLois Curfman McInnes   }
219*8965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
220*8965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
221*8965ea79SLois Curfman McInnes 
222*8965ea79SLois Curfman McInnes   return 0;
223*8965ea79SLois Curfman McInnes }
224*8965ea79SLois Curfman McInnes 
225*8965ea79SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
226*8965ea79SLois Curfman McInnes {
227*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
228*8965ea79SLois Curfman McInnes   int        ierr;
229*8965ea79SLois Curfman McInnes   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
230*8965ea79SLois Curfman McInnes   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
231*8965ea79SLois Curfman McInnes   return 0;
232*8965ea79SLois Curfman McInnes }
233*8965ea79SLois Curfman McInnes 
234*8965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
235*8965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
236*8965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
237*8965ea79SLois Curfman McInnes    aij->A and aij->B directly and not through the MatZeroRows()
238*8965ea79SLois Curfman McInnes    routine.
239*8965ea79SLois Curfman McInnes */
240*8965ea79SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
241*8965ea79SLois Curfman McInnes {
242*8965ea79SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
243*8965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
244*8965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
245*8965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
246*8965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
247*8965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
248*8965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
249*8965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
250*8965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
251*8965ea79SLois Curfman McInnes   IS             istmp;
252*8965ea79SLois Curfman McInnes 
253*8965ea79SLois Curfman McInnes   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
254*8965ea79SLois Curfman McInnes   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
255*8965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
256*8965ea79SLois Curfman McInnes 
257*8965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
258*8965ea79SLois Curfman McInnes   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
259*8965ea79SLois Curfman McInnes   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
260*8965ea79SLois Curfman McInnes   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
261*8965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
262*8965ea79SLois Curfman McInnes     idx = rows[i];
263*8965ea79SLois Curfman McInnes     found = 0;
264*8965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
265*8965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
266*8965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
267*8965ea79SLois Curfman McInnes       }
268*8965ea79SLois Curfman McInnes     }
269*8965ea79SLois Curfman McInnes     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
270*8965ea79SLois Curfman McInnes   }
271*8965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
272*8965ea79SLois Curfman McInnes 
273*8965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
274*8965ea79SLois Curfman McInnes   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
275*8965ea79SLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
276*8965ea79SLois Curfman McInnes   nrecvs = work[rank];
277*8965ea79SLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
278*8965ea79SLois Curfman McInnes   nmax = work[rank];
279*8965ea79SLois Curfman McInnes   PETSCFREE(work);
280*8965ea79SLois Curfman McInnes 
281*8965ea79SLois Curfman McInnes   /* post receives:   */
282*8965ea79SLois Curfman McInnes   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
283*8965ea79SLois Curfman McInnes   CHKPTRQ(rvalues);
284*8965ea79SLois Curfman McInnes   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
285*8965ea79SLois Curfman McInnes   CHKPTRQ(recv_waits);
286*8965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
287*8965ea79SLois Curfman McInnes     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
288*8965ea79SLois Curfman McInnes   }
289*8965ea79SLois Curfman McInnes 
290*8965ea79SLois Curfman McInnes   /* do sends:
291*8965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
292*8965ea79SLois Curfman McInnes          the ith processor
293*8965ea79SLois Curfman McInnes   */
294*8965ea79SLois Curfman McInnes   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
295*8965ea79SLois Curfman McInnes   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
296*8965ea79SLois Curfman McInnes   CHKPTRQ(send_waits);
297*8965ea79SLois Curfman McInnes   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
298*8965ea79SLois Curfman McInnes   starts[0] = 0;
299*8965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
300*8965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
301*8965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
302*8965ea79SLois Curfman McInnes   }
303*8965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
304*8965ea79SLois Curfman McInnes 
305*8965ea79SLois Curfman McInnes   starts[0] = 0;
306*8965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
307*8965ea79SLois Curfman McInnes   count = 0;
308*8965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
309*8965ea79SLois Curfman McInnes     if (procs[i]) {
310*8965ea79SLois Curfman McInnes       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
311*8965ea79SLois Curfman McInnes     }
312*8965ea79SLois Curfman McInnes   }
313*8965ea79SLois Curfman McInnes   PETSCFREE(starts);
314*8965ea79SLois Curfman McInnes 
315*8965ea79SLois Curfman McInnes   base = owners[rank];
316*8965ea79SLois Curfman McInnes 
317*8965ea79SLois Curfman McInnes   /*  wait on receives */
318*8965ea79SLois Curfman McInnes   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
319*8965ea79SLois Curfman McInnes   source = lens + nrecvs;
320*8965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
321*8965ea79SLois Curfman McInnes   while (count) {
322*8965ea79SLois Curfman McInnes     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
323*8965ea79SLois Curfman McInnes     /* unpack receives into our local space */
324*8965ea79SLois Curfman McInnes     MPI_Get_count(&recv_status,MPI_INT,&n);
325*8965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
326*8965ea79SLois Curfman McInnes     lens[imdex]  = n;
327*8965ea79SLois Curfman McInnes     slen += n;
328*8965ea79SLois Curfman McInnes     count--;
329*8965ea79SLois Curfman McInnes   }
330*8965ea79SLois Curfman McInnes   PETSCFREE(recv_waits);
331*8965ea79SLois Curfman McInnes 
332*8965ea79SLois Curfman McInnes   /* move the data into the send scatter */
333*8965ea79SLois Curfman McInnes   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
334*8965ea79SLois Curfman McInnes   count = 0;
335*8965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
336*8965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
337*8965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
338*8965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
339*8965ea79SLois Curfman McInnes     }
340*8965ea79SLois Curfman McInnes   }
341*8965ea79SLois Curfman McInnes   PETSCFREE(rvalues); PETSCFREE(lens);
342*8965ea79SLois Curfman McInnes   PETSCFREE(owner); PETSCFREE(nprocs);
343*8965ea79SLois Curfman McInnes 
344*8965ea79SLois Curfman McInnes   /* actually zap the local rows */
345*8965ea79SLois Curfman McInnes   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
346*8965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
347*8965ea79SLois Curfman McInnes   PETSCFREE(lrows);
348*8965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
349*8965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
350*8965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp); CHKERRQ(ierr);
351*8965ea79SLois Curfman McInnes 
352*8965ea79SLois Curfman McInnes   /* wait on sends */
353*8965ea79SLois Curfman McInnes   if (nsends) {
354*8965ea79SLois Curfman McInnes     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
355*8965ea79SLois Curfman McInnes     CHKPTRQ(send_status);
356*8965ea79SLois Curfman McInnes     MPI_Waitall(nsends,send_waits,send_status);
357*8965ea79SLois Curfman McInnes     PETSCFREE(send_status);
358*8965ea79SLois Curfman McInnes   }
359*8965ea79SLois Curfman McInnes   PETSCFREE(send_waits); PETSCFREE(svalues);
360*8965ea79SLois Curfman McInnes 
361*8965ea79SLois Curfman McInnes   return 0;
362*8965ea79SLois Curfman McInnes }
363*8965ea79SLois Curfman McInnes 
364*8965ea79SLois Curfman McInnes static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
365*8965ea79SLois Curfman McInnes {
366*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
367*8965ea79SLois Curfman McInnes   int        ierr;
368*8965ea79SLois Curfman McInnes 
369*8965ea79SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
370*8965ea79SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
371*8965ea79SLois Curfman McInnes   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
372*8965ea79SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
373*8965ea79SLois Curfman McInnes   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
374*8965ea79SLois Curfman McInnes   return 0;
375*8965ea79SLois Curfman McInnes }
376*8965ea79SLois Curfman McInnes 
377*8965ea79SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
378*8965ea79SLois Curfman McInnes {
379*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
380*8965ea79SLois Curfman McInnes   int        ierr;
381*8965ea79SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
382*8965ea79SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
383*8965ea79SLois Curfman McInnes   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
384*8965ea79SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
385*8965ea79SLois Curfman McInnes   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
386*8965ea79SLois Curfman McInnes   return 0;
387*8965ea79SLois Curfman McInnes }
388*8965ea79SLois Curfman McInnes 
389*8965ea79SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
390*8965ea79SLois Curfman McInnes {
391*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
392*8965ea79SLois Curfman McInnes   int        ierr;
393*8965ea79SLois Curfman McInnes 
394*8965ea79SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
395*8965ea79SLois Curfman McInnes   /* do nondiagonal part */
396*8965ea79SLois Curfman McInnes   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
397*8965ea79SLois Curfman McInnes   /* send it on its way */
398*8965ea79SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
399*8965ea79SLois Curfman McInnes                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
400*8965ea79SLois Curfman McInnes   /* do local part */
401*8965ea79SLois Curfman McInnes   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
402*8965ea79SLois Curfman McInnes   /* receive remote parts: note this assumes the values are not actually */
403*8965ea79SLois Curfman McInnes   /* inserted in yy until the next line, which is true for my implementation*/
404*8965ea79SLois Curfman McInnes   /* but is not perhaps always true. */
405*8965ea79SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
406*8965ea79SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
407*8965ea79SLois Curfman McInnes   return 0;
408*8965ea79SLois Curfman McInnes }
409*8965ea79SLois Curfman McInnes 
410*8965ea79SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
411*8965ea79SLois Curfman McInnes {
412*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
413*8965ea79SLois Curfman McInnes   int        ierr;
414*8965ea79SLois Curfman McInnes 
415*8965ea79SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
416*8965ea79SLois Curfman McInnes   /* do nondiagonal part */
417*8965ea79SLois Curfman McInnes   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
418*8965ea79SLois Curfman McInnes   /* send it on its way */
419*8965ea79SLois Curfman McInnes   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
420*8965ea79SLois Curfman McInnes                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
421*8965ea79SLois Curfman McInnes   /* do local part */
422*8965ea79SLois Curfman McInnes   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
423*8965ea79SLois Curfman McInnes   /* receive remote parts: note this assumes the values are not actually */
424*8965ea79SLois Curfman McInnes   /* inserted in yy until the next line, which is true for my implementation*/
425*8965ea79SLois Curfman McInnes   /* but is not perhaps always true. */
426*8965ea79SLois Curfman McInnes   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
427*8965ea79SLois Curfman McInnes                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
428*8965ea79SLois Curfman McInnes   return 0;
429*8965ea79SLois Curfman McInnes }
430*8965ea79SLois Curfman McInnes 
431*8965ea79SLois Curfman McInnes /*
432*8965ea79SLois Curfman McInnes   This only works correctly for square matrices where the subblock A->A is the
433*8965ea79SLois Curfman McInnes    diagonal block
434*8965ea79SLois Curfman McInnes */
435*8965ea79SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
436*8965ea79SLois Curfman McInnes {
437*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
438*8965ea79SLois Curfman McInnes   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
439*8965ea79SLois Curfman McInnes   return MatGetDiagonal(a->A,v);
440*8965ea79SLois Curfman McInnes }
441*8965ea79SLois Curfman McInnes 
442*8965ea79SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
443*8965ea79SLois Curfman McInnes {
444*8965ea79SLois Curfman McInnes   Mat        mat = (Mat) obj;
445*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
446*8965ea79SLois Curfman McInnes   int        ierr;
447*8965ea79SLois Curfman McInnes #if defined(PETSC_LOG)
448*8965ea79SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
449*8965ea79SLois Curfman McInnes #endif
450*8965ea79SLois Curfman McInnes   PETSCFREE(aij->rowners);
451*8965ea79SLois Curfman McInnes   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
452*8965ea79SLois Curfman McInnes   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
453*8965ea79SLois Curfman McInnes   if (aij->colmap) PETSCFREE(aij->colmap);
454*8965ea79SLois Curfman McInnes   if (aij->garray) PETSCFREE(aij->garray);
455*8965ea79SLois Curfman McInnes   if (aij->lvec)   VecDestroy(aij->lvec);
456*8965ea79SLois Curfman McInnes   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
457*8965ea79SLois Curfman McInnes   PETSCFREE(aij);
458*8965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
459*8965ea79SLois Curfman McInnes   PETSCHEADERDESTROY(mat);
460*8965ea79SLois Curfman McInnes   return 0;
461*8965ea79SLois Curfman McInnes }
462*8965ea79SLois Curfman McInnes #include "draw.h"
463*8965ea79SLois Curfman McInnes #include "pinclude/pviewer.h"
464*8965ea79SLois Curfman McInnes 
465*8965ea79SLois Curfman McInnes static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
466*8965ea79SLois Curfman McInnes {
467*8965ea79SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
468*8965ea79SLois Curfman McInnes   int         ierr;
469*8965ea79SLois Curfman McInnes 
470*8965ea79SLois Curfman McInnes   if (aij->size == 1) {
471*8965ea79SLois Curfman McInnes     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
472*8965ea79SLois Curfman McInnes   }
473*8965ea79SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
474*8965ea79SLois Curfman McInnes   return 0;
475*8965ea79SLois Curfman McInnes }
476*8965ea79SLois Curfman McInnes 
477*8965ea79SLois Curfman McInnes static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
478*8965ea79SLois Curfman McInnes {
479*8965ea79SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
480*8965ea79SLois Curfman McInnes   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
481*8965ea79SLois Curfman McInnes   int         ierr, format,shift = C->indexshift;
482*8965ea79SLois Curfman McInnes   PetscObject vobj = (PetscObject) viewer;
483*8965ea79SLois Curfman McInnes   FILE        *fd;
484*8965ea79SLois Curfman McInnes 
485*8965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
486*8965ea79SLois Curfman McInnes     ierr = ViewerFileGetFormat_Private(viewer,&format);
487*8965ea79SLois Curfman McInnes     if (format == FILE_FORMAT_INFO) {
488*8965ea79SLois Curfman McInnes       return 0;
489*8965ea79SLois Curfman McInnes     }
490*8965ea79SLois Curfman McInnes   }
491*8965ea79SLois Curfman McInnes 
492*8965ea79SLois Curfman McInnes   if (vobj->type == ASCII_FILE_VIEWER) {
493*8965ea79SLois Curfman McInnes     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
494*8965ea79SLois Curfman McInnes     MPIU_Seq_begin(mat->comm,1);
495*8965ea79SLois Curfman McInnes     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
496*8965ea79SLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
497*8965ea79SLois Curfman McInnes            aij->cend);
498*8965ea79SLois Curfman McInnes     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
499*8965ea79SLois Curfman McInnes     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
500*8965ea79SLois Curfman McInnes     fflush(fd);
501*8965ea79SLois Curfman McInnes     MPIU_Seq_end(mat->comm,1);
502*8965ea79SLois Curfman McInnes   }
503*8965ea79SLois Curfman McInnes   else {
504*8965ea79SLois Curfman McInnes     int size = aij->size, rank = aij->rank;
505*8965ea79SLois Curfman McInnes     if (size == 1) {
506*8965ea79SLois Curfman McInnes       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
507*8965ea79SLois Curfman McInnes     }
508*8965ea79SLois Curfman McInnes     else {
509*8965ea79SLois Curfman McInnes       /* assemble the entire matrix onto first processor. */
510*8965ea79SLois Curfman McInnes       Mat         A;
511*8965ea79SLois Curfman McInnes       Mat_SeqAIJ *Aloc;
512*8965ea79SLois Curfman McInnes       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
513*8965ea79SLois Curfman McInnes       Scalar      *a;
514*8965ea79SLois Curfman McInnes 
515*8965ea79SLois Curfman McInnes       if (!rank) {
516*8965ea79SLois Curfman McInnes         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
517*8965ea79SLois Curfman McInnes       }
518*8965ea79SLois Curfman McInnes       else {
519*8965ea79SLois Curfman McInnes         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
520*8965ea79SLois Curfman McInnes       }
521*8965ea79SLois Curfman McInnes       PLogObjectParent(mat,A);
522*8965ea79SLois Curfman McInnes 
523*8965ea79SLois Curfman McInnes 
524*8965ea79SLois Curfman McInnes       /* copy over the A part */
525*8965ea79SLois Curfman McInnes       Aloc = (Mat_SeqAIJ*) aij->A->data;
526*8965ea79SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
527*8965ea79SLois Curfman McInnes       row = aij->rstart;
528*8965ea79SLois Curfman McInnes       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
529*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
530*8965ea79SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
531*8965ea79SLois Curfman McInnes         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
532*8965ea79SLois Curfman McInnes       }
533*8965ea79SLois Curfman McInnes       aj = Aloc->j;
534*8965ea79SLois Curfman McInnes       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
535*8965ea79SLois Curfman McInnes 
536*8965ea79SLois Curfman McInnes       /* copy over the B part */
537*8965ea79SLois Curfman McInnes       Aloc = (Mat_SeqAIJ*) aij->B->data;
538*8965ea79SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
539*8965ea79SLois Curfman McInnes       row = aij->rstart;
540*8965ea79SLois Curfman McInnes       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
541*8965ea79SLois Curfman McInnes       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
542*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
543*8965ea79SLois Curfman McInnes         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
544*8965ea79SLois Curfman McInnes         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
545*8965ea79SLois Curfman McInnes       }
546*8965ea79SLois Curfman McInnes       PETSCFREE(ct);
547*8965ea79SLois Curfman McInnes       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
548*8965ea79SLois Curfman McInnes       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
549*8965ea79SLois Curfman McInnes       if (!rank) {
550*8965ea79SLois Curfman McInnes         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
551*8965ea79SLois Curfman McInnes       }
552*8965ea79SLois Curfman McInnes       ierr = MatDestroy(A); CHKERRQ(ierr);
553*8965ea79SLois Curfman McInnes     }
554*8965ea79SLois Curfman McInnes   }
555*8965ea79SLois Curfman McInnes   return 0;
556*8965ea79SLois Curfman McInnes }
557*8965ea79SLois Curfman McInnes 
558*8965ea79SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
559*8965ea79SLois Curfman McInnes {
560*8965ea79SLois Curfman McInnes   Mat         mat = (Mat) obj;
561*8965ea79SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
562*8965ea79SLois Curfman McInnes   int         ierr;
563*8965ea79SLois Curfman McInnes   PetscObject vobj = (PetscObject) viewer;
564*8965ea79SLois Curfman McInnes 
565*8965ea79SLois Curfman McInnes   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
566*8965ea79SLois Curfman McInnes   if (!viewer) {
567*8965ea79SLois Curfman McInnes     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
568*8965ea79SLois Curfman McInnes   }
569*8965ea79SLois Curfman McInnes   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
570*8965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
571*8965ea79SLois Curfman McInnes     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
572*8965ea79SLois Curfman McInnes   }
573*8965ea79SLois Curfman McInnes   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
574*8965ea79SLois Curfman McInnes     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
575*8965ea79SLois Curfman McInnes   }
576*8965ea79SLois Curfman McInnes   else if (vobj->cookie == DRAW_COOKIE) {
577*8965ea79SLois Curfman McInnes     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
578*8965ea79SLois Curfman McInnes   }
579*8965ea79SLois Curfman McInnes   else if (vobj->type == BINARY_FILE_VIEWER) {
580*8965ea79SLois Curfman McInnes     return MatView_MPIAIJ_Binary(mat,viewer);
581*8965ea79SLois Curfman McInnes   }
582*8965ea79SLois Curfman McInnes   return 0;
583*8965ea79SLois Curfman McInnes }
584*8965ea79SLois Curfman McInnes 
585*8965ea79SLois Curfman McInnes extern int MatMarkDiag_SeqAIJ(Mat);
586*8965ea79SLois Curfman McInnes /*
587*8965ea79SLois Curfman McInnes     This has to provide several versions.
588*8965ea79SLois Curfman McInnes 
589*8965ea79SLois Curfman McInnes      1) per sequential
590*8965ea79SLois Curfman McInnes      2) a) use only local smoothing updating outer values only once.
591*8965ea79SLois Curfman McInnes         b) local smoothing updating outer values each inner iteration
592*8965ea79SLois Curfman McInnes      3) color updating out values betwen colors.
593*8965ea79SLois Curfman McInnes */
594*8965ea79SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
595*8965ea79SLois Curfman McInnes                            double fshift,int its,Vec xx)
596*8965ea79SLois Curfman McInnes {
597*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
598*8965ea79SLois Curfman McInnes   Mat        AA = mat->A, BB = mat->B;
599*8965ea79SLois Curfman McInnes   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
600*8965ea79SLois Curfman McInnes   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
601*8965ea79SLois Curfman McInnes   int        ierr,*idx, *diag;
602*8965ea79SLois Curfman McInnes   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
603*8965ea79SLois Curfman McInnes   Vec        tt;
604*8965ea79SLois Curfman McInnes 
605*8965ea79SLois Curfman McInnes   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
606*8965ea79SLois Curfman McInnes 
607*8965ea79SLois Curfman McInnes   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
608*8965ea79SLois Curfman McInnes   xs = x + shift; /* shift by one for index start of 1 */
609*8965ea79SLois Curfman McInnes   ls = ls + shift;
610*8965ea79SLois Curfman McInnes   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
611*8965ea79SLois Curfman McInnes   diag = A->diag;
612*8965ea79SLois Curfman McInnes   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
613*8965ea79SLois Curfman McInnes     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
614*8965ea79SLois Curfman McInnes   }
615*8965ea79SLois Curfman McInnes   if (flag & SOR_EISENSTAT) {
616*8965ea79SLois Curfman McInnes     /* Let  A = L + U + D; where L is lower trianglar,
617*8965ea79SLois Curfman McInnes     U is upper triangular, E is diagonal; This routine applies
618*8965ea79SLois Curfman McInnes 
619*8965ea79SLois Curfman McInnes             (L + E)^{-1} A (U + E)^{-1}
620*8965ea79SLois Curfman McInnes 
621*8965ea79SLois Curfman McInnes     to a vector efficiently using Eisenstat's trick. This is for
622*8965ea79SLois Curfman McInnes     the case of SSOR preconditioner, so E is D/omega where omega
623*8965ea79SLois Curfman McInnes     is the relaxation factor.
624*8965ea79SLois Curfman McInnes     */
625*8965ea79SLois Curfman McInnes     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
626*8965ea79SLois Curfman McInnes     PLogObjectParent(matin,tt);
627*8965ea79SLois Curfman McInnes     VecGetArray(tt,&t);
628*8965ea79SLois Curfman McInnes     scale = (2.0/omega) - 1.0;
629*8965ea79SLois Curfman McInnes     /*  x = (E + U)^{-1} b */
630*8965ea79SLois Curfman McInnes     VecSet(&zero,mat->lvec);
631*8965ea79SLois Curfman McInnes     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
632*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
633*8965ea79SLois Curfman McInnes     for ( i=m-1; i>-1; i-- ) {
634*8965ea79SLois Curfman McInnes       n    = A->i[i+1] - diag[i] - 1;
635*8965ea79SLois Curfman McInnes       idx  = A->j + diag[i] + !shift;
636*8965ea79SLois Curfman McInnes       v    = A->a + diag[i] + !shift;
637*8965ea79SLois Curfman McInnes       sum  = b[i];
638*8965ea79SLois Curfman McInnes       SPARSEDENSEMDOT(sum,xs,v,idx,n);
639*8965ea79SLois Curfman McInnes       d    = fshift + A->a[diag[i]+shift];
640*8965ea79SLois Curfman McInnes       n    = B->i[i+1] - B->i[i];
641*8965ea79SLois Curfman McInnes       idx  = B->j + B->i[i] + shift;
642*8965ea79SLois Curfman McInnes       v    = B->a + B->i[i] + shift;
643*8965ea79SLois Curfman McInnes       SPARSEDENSEMDOT(sum,ls,v,idx,n);
644*8965ea79SLois Curfman McInnes       x[i] = omega*(sum/d);
645*8965ea79SLois Curfman McInnes     }
646*8965ea79SLois Curfman McInnes     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
647*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
648*8965ea79SLois Curfman McInnes 
649*8965ea79SLois Curfman McInnes     /*  t = b - (2*E - D)x */
650*8965ea79SLois Curfman McInnes     v = A->a;
651*8965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
652*8965ea79SLois Curfman McInnes 
653*8965ea79SLois Curfman McInnes     /*  t = (E + L)^{-1}t */
654*8965ea79SLois Curfman McInnes     ts = t + shift; /* shifted by one for index start of a or mat->j*/
655*8965ea79SLois Curfman McInnes     diag = A->diag;
656*8965ea79SLois Curfman McInnes     VecSet(&zero,mat->lvec);
657*8965ea79SLois Curfman McInnes     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
658*8965ea79SLois Curfman McInnes                                                  mat->Mvctx); CHKERRQ(ierr);
659*8965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
660*8965ea79SLois Curfman McInnes       n    = diag[i] - A->i[i];
661*8965ea79SLois Curfman McInnes       idx  = A->j + A->i[i] + shift;
662*8965ea79SLois Curfman McInnes       v    = A->a + A->i[i] + shift;
663*8965ea79SLois Curfman McInnes       sum  = t[i];
664*8965ea79SLois Curfman McInnes       SPARSEDENSEMDOT(sum,ts,v,idx,n);
665*8965ea79SLois Curfman McInnes       d    = fshift + A->a[diag[i]+shift];
666*8965ea79SLois Curfman McInnes       n    = B->i[i+1] - B->i[i];
667*8965ea79SLois Curfman McInnes       idx  = B->j + B->i[i] + shift;
668*8965ea79SLois Curfman McInnes       v    = B->a + B->i[i] + shift;
669*8965ea79SLois Curfman McInnes       SPARSEDENSEMDOT(sum,ls,v,idx,n);
670*8965ea79SLois Curfman McInnes       t[i] = omega*(sum/d);
671*8965ea79SLois Curfman McInnes     }
672*8965ea79SLois Curfman McInnes     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
673*8965ea79SLois Curfman McInnes                                                     mat->Mvctx); CHKERRQ(ierr);
674*8965ea79SLois Curfman McInnes     /*  x = x + t */
675*8965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
676*8965ea79SLois Curfman McInnes     VecDestroy(tt);
677*8965ea79SLois Curfman McInnes     return 0;
678*8965ea79SLois Curfman McInnes   }
679*8965ea79SLois Curfman McInnes 
680*8965ea79SLois Curfman McInnes 
681*8965ea79SLois Curfman McInnes   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
682*8965ea79SLois Curfman McInnes     if (flag & SOR_ZERO_INITIAL_GUESS) {
683*8965ea79SLois Curfman McInnes       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
684*8965ea79SLois Curfman McInnes     }
685*8965ea79SLois Curfman McInnes     else {
686*8965ea79SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
687*8965ea79SLois Curfman McInnes       CHKERRQ(ierr);
688*8965ea79SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
689*8965ea79SLois Curfman McInnes       CHKERRQ(ierr);
690*8965ea79SLois Curfman McInnes     }
691*8965ea79SLois Curfman McInnes     while (its--) {
692*8965ea79SLois Curfman McInnes       /* go down through the rows */
693*8965ea79SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
694*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
695*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
696*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
697*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
698*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
699*8965ea79SLois Curfman McInnes         sum  = b[i];
700*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
701*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
702*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
703*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
704*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
705*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
706*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
707*8965ea79SLois Curfman McInnes       }
708*8965ea79SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
709*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
710*8965ea79SLois Curfman McInnes       /* come up through the rows */
711*8965ea79SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
712*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
713*8965ea79SLois Curfman McInnes       for ( i=m-1; i>-1; i-- ) {
714*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
715*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
716*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
717*8965ea79SLois Curfman McInnes         sum  = b[i];
718*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
719*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
720*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
721*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
722*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
723*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
724*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
725*8965ea79SLois Curfman McInnes       }
726*8965ea79SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
727*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
728*8965ea79SLois Curfman McInnes     }
729*8965ea79SLois Curfman McInnes   }
730*8965ea79SLois Curfman McInnes   else if (flag & SOR_FORWARD_SWEEP){
731*8965ea79SLois Curfman McInnes     if (flag & SOR_ZERO_INITIAL_GUESS) {
732*8965ea79SLois Curfman McInnes       VecSet(&zero,mat->lvec);
733*8965ea79SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
734*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
735*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
736*8965ea79SLois Curfman McInnes         n    = diag[i] - A->i[i];
737*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
738*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
739*8965ea79SLois Curfman McInnes         sum  = b[i];
740*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
741*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
742*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
743*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
744*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
745*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
746*8965ea79SLois Curfman McInnes         x[i] = omega*(sum/d);
747*8965ea79SLois Curfman McInnes       }
748*8965ea79SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
749*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
750*8965ea79SLois Curfman McInnes       its--;
751*8965ea79SLois Curfman McInnes     }
752*8965ea79SLois Curfman McInnes     while (its--) {
753*8965ea79SLois Curfman McInnes       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
754*8965ea79SLois Curfman McInnes       CHKERRQ(ierr);
755*8965ea79SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
756*8965ea79SLois Curfman McInnes       CHKERRQ(ierr);
757*8965ea79SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
758*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
759*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
760*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
761*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
762*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
763*8965ea79SLois Curfman McInnes         sum  = b[i];
764*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
765*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
766*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
767*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
768*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
769*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
770*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
771*8965ea79SLois Curfman McInnes       }
772*8965ea79SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
773*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
774*8965ea79SLois Curfman McInnes     }
775*8965ea79SLois Curfman McInnes   }
776*8965ea79SLois Curfman McInnes   else if (flag & SOR_BACKWARD_SWEEP){
777*8965ea79SLois Curfman McInnes     if (flag & SOR_ZERO_INITIAL_GUESS) {
778*8965ea79SLois Curfman McInnes       VecSet(&zero,mat->lvec);
779*8965ea79SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
780*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
781*8965ea79SLois Curfman McInnes       for ( i=m-1; i>-1; i-- ) {
782*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - diag[i] - 1;
783*8965ea79SLois Curfman McInnes         idx  = A->j + diag[i] + !shift;
784*8965ea79SLois Curfman McInnes         v    = A->a + diag[i] + !shift;
785*8965ea79SLois Curfman McInnes         sum  = b[i];
786*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
787*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
788*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
789*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
790*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
791*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
792*8965ea79SLois Curfman McInnes         x[i] = omega*(sum/d);
793*8965ea79SLois Curfman McInnes       }
794*8965ea79SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
795*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
796*8965ea79SLois Curfman McInnes       its--;
797*8965ea79SLois Curfman McInnes     }
798*8965ea79SLois Curfman McInnes     while (its--) {
799*8965ea79SLois Curfman McInnes       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
800*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
801*8965ea79SLois Curfman McInnes       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
802*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
803*8965ea79SLois Curfman McInnes       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
804*8965ea79SLois Curfman McInnes                               mat->Mvctx); CHKERRQ(ierr);
805*8965ea79SLois Curfman McInnes       for ( i=m-1; i>-1; i-- ) {
806*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
807*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
808*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
809*8965ea79SLois Curfman McInnes         sum  = b[i];
810*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
811*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
812*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
813*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
814*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
815*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
816*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
817*8965ea79SLois Curfman McInnes       }
818*8965ea79SLois Curfman McInnes       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
819*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
820*8965ea79SLois Curfman McInnes     }
821*8965ea79SLois Curfman McInnes   }
822*8965ea79SLois Curfman McInnes   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
823*8965ea79SLois Curfman McInnes     if (flag & SOR_ZERO_INITIAL_GUESS) {
824*8965ea79SLois Curfman McInnes       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
825*8965ea79SLois Curfman McInnes     }
826*8965ea79SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
827*8965ea79SLois Curfman McInnes     CHKERRQ(ierr);
828*8965ea79SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
829*8965ea79SLois Curfman McInnes     CHKERRQ(ierr);
830*8965ea79SLois Curfman McInnes     while (its--) {
831*8965ea79SLois Curfman McInnes       /* go down through the rows */
832*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
833*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
834*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
835*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
836*8965ea79SLois Curfman McInnes         sum  = b[i];
837*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
838*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
839*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
840*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
841*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
842*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
843*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
844*8965ea79SLois Curfman McInnes       }
845*8965ea79SLois Curfman McInnes       /* come up through the rows */
846*8965ea79SLois Curfman McInnes       for ( i=m-1; i>-1; i-- ) {
847*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
848*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
849*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
850*8965ea79SLois Curfman McInnes         sum  = b[i];
851*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
852*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
853*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
854*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
855*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
856*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
857*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
858*8965ea79SLois Curfman McInnes       }
859*8965ea79SLois Curfman McInnes     }
860*8965ea79SLois Curfman McInnes   }
861*8965ea79SLois Curfman McInnes   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
862*8965ea79SLois Curfman McInnes     if (flag & SOR_ZERO_INITIAL_GUESS) {
863*8965ea79SLois Curfman McInnes       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
864*8965ea79SLois Curfman McInnes     }
865*8965ea79SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
866*8965ea79SLois Curfman McInnes     CHKERRQ(ierr);
867*8965ea79SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
868*8965ea79SLois Curfman McInnes     CHKERRQ(ierr);
869*8965ea79SLois Curfman McInnes     while (its--) {
870*8965ea79SLois Curfman McInnes       for ( i=0; i<m; i++ ) {
871*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
872*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
873*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
874*8965ea79SLois Curfman McInnes         sum  = b[i];
875*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
876*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
877*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
878*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
879*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
880*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
881*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
882*8965ea79SLois Curfman McInnes       }
883*8965ea79SLois Curfman McInnes     }
884*8965ea79SLois Curfman McInnes   }
885*8965ea79SLois Curfman McInnes   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
886*8965ea79SLois Curfman McInnes     if (flag & SOR_ZERO_INITIAL_GUESS) {
887*8965ea79SLois Curfman McInnes       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
888*8965ea79SLois Curfman McInnes     }
889*8965ea79SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
890*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
891*8965ea79SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
892*8965ea79SLois Curfman McInnes                             mat->Mvctx); CHKERRQ(ierr);
893*8965ea79SLois Curfman McInnes     while (its--) {
894*8965ea79SLois Curfman McInnes       for ( i=m-1; i>-1; i-- ) {
895*8965ea79SLois Curfman McInnes         n    = A->i[i+1] - A->i[i];
896*8965ea79SLois Curfman McInnes         idx  = A->j + A->i[i] + shift;
897*8965ea79SLois Curfman McInnes         v    = A->a + A->i[i] + shift;
898*8965ea79SLois Curfman McInnes         sum  = b[i];
899*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,xs,v,idx,n);
900*8965ea79SLois Curfman McInnes         d    = fshift + A->a[diag[i]+shift];
901*8965ea79SLois Curfman McInnes         n    = B->i[i+1] - B->i[i];
902*8965ea79SLois Curfman McInnes         idx  = B->j + B->i[i] + shift;
903*8965ea79SLois Curfman McInnes         v    = B->a + B->i[i] + shift;
904*8965ea79SLois Curfman McInnes         SPARSEDENSEMDOT(sum,ls,v,idx,n);
905*8965ea79SLois Curfman McInnes         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
906*8965ea79SLois Curfman McInnes       }
907*8965ea79SLois Curfman McInnes     }
908*8965ea79SLois Curfman McInnes   }
909*8965ea79SLois Curfman McInnes   return 0;
910*8965ea79SLois Curfman McInnes }
911*8965ea79SLois Curfman McInnes 
912*8965ea79SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
913*8965ea79SLois Curfman McInnes                              int *nzalloc,int *mem)
914*8965ea79SLois Curfman McInnes {
915*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
916*8965ea79SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
917*8965ea79SLois Curfman McInnes   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
918*8965ea79SLois Curfman McInnes 
919*8965ea79SLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
920*8965ea79SLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
921*8965ea79SLois Curfman McInnes   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
922*8965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
923*8965ea79SLois Curfman McInnes     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
924*8965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
925*8965ea79SLois Curfman McInnes     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
926*8965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
927*8965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
928*8965ea79SLois Curfman McInnes     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
929*8965ea79SLois Curfman McInnes     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
930*8965ea79SLois Curfman McInnes   }
931*8965ea79SLois Curfman McInnes   return 0;
932*8965ea79SLois Curfman McInnes }
933*8965ea79SLois Curfman McInnes 
934*8965ea79SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
935*8965ea79SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
936*8965ea79SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
937*8965ea79SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
938*8965ea79SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
939*8965ea79SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
940*8965ea79SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
941*8965ea79SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
942*8965ea79SLois Curfman McInnes 
943*8965ea79SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat A,MatOption op)
944*8965ea79SLois Curfman McInnes {
945*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
946*8965ea79SLois Curfman McInnes 
947*8965ea79SLois Curfman McInnes   if (op == NO_NEW_NONZERO_LOCATIONS ||
948*8965ea79SLois Curfman McInnes       op == YES_NEW_NONZERO_LOCATIONS ||
949*8965ea79SLois Curfman McInnes       op == COLUMNS_SORTED ||
950*8965ea79SLois Curfman McInnes       op == ROW_ORIENTED) {
951*8965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
952*8965ea79SLois Curfman McInnes         MatSetOption(a->B,op);
953*8965ea79SLois Curfman McInnes   }
954*8965ea79SLois Curfman McInnes   else if (op == ROWS_SORTED ||
955*8965ea79SLois Curfman McInnes            op == SYMMETRIC_MATRIX ||
956*8965ea79SLois Curfman McInnes            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
957*8965ea79SLois Curfman McInnes            op == YES_NEW_DIAGONALS)
958*8965ea79SLois Curfman McInnes     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
959*8965ea79SLois Curfman McInnes   else if (op == COLUMN_ORIENTED)
960*8965ea79SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
961*8965ea79SLois Curfman McInnes   else if (op == NO_NEW_DIAGONALS)
962*8965ea79SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
963*8965ea79SLois Curfman McInnes   else
964*8965ea79SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
965*8965ea79SLois Curfman McInnes   return 0;
966*8965ea79SLois Curfman McInnes }
967*8965ea79SLois Curfman McInnes 
968*8965ea79SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
969*8965ea79SLois Curfman McInnes {
970*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
971*8965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
972*8965ea79SLois Curfman McInnes   return 0;
973*8965ea79SLois Curfman McInnes }
974*8965ea79SLois Curfman McInnes 
975*8965ea79SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
976*8965ea79SLois Curfman McInnes {
977*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
978*8965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
979*8965ea79SLois Curfman McInnes   return 0;
980*8965ea79SLois Curfman McInnes }
981*8965ea79SLois Curfman McInnes 
982*8965ea79SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
983*8965ea79SLois Curfman McInnes {
984*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
985*8965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
986*8965ea79SLois Curfman McInnes   return 0;
987*8965ea79SLois Curfman McInnes }
988*8965ea79SLois Curfman McInnes 
989*8965ea79SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
990*8965ea79SLois Curfman McInnes {
991*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
992*8965ea79SLois Curfman McInnes   Scalar     *vworkA, *vworkB, **pvA, **pvB;
993*8965ea79SLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
994*8965ea79SLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
995*8965ea79SLois Curfman McInnes 
996*8965ea79SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
997*8965ea79SLois Curfman McInnes   lrow = row - rstart;
998*8965ea79SLois Curfman McInnes 
999*8965ea79SLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1000*8965ea79SLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1001*8965ea79SLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1002*8965ea79SLois Curfman McInnes   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1003*8965ea79SLois Curfman McInnes   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1004*8965ea79SLois Curfman McInnes   nztot = nzA + nzB;
1005*8965ea79SLois Curfman McInnes 
1006*8965ea79SLois Curfman McInnes   if (v  || idx) {
1007*8965ea79SLois Curfman McInnes     if (nztot) {
1008*8965ea79SLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1009*8965ea79SLois Curfman McInnes       int imark;
1010*8965ea79SLois Curfman McInnes       if (mat->assembled) {
1011*8965ea79SLois Curfman McInnes         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1012*8965ea79SLois Curfman McInnes       }
1013*8965ea79SLois Curfman McInnes       if (v) {
1014*8965ea79SLois Curfman McInnes         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1015*8965ea79SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1016*8965ea79SLois Curfman McInnes           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1017*8965ea79SLois Curfman McInnes           else break;
1018*8965ea79SLois Curfman McInnes         }
1019*8965ea79SLois Curfman McInnes         imark = i;
1020*8965ea79SLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1021*8965ea79SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1022*8965ea79SLois Curfman McInnes       }
1023*8965ea79SLois Curfman McInnes       if (idx) {
1024*8965ea79SLois Curfman McInnes         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1025*8965ea79SLois Curfman McInnes         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1026*8965ea79SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
1027*8965ea79SLois Curfman McInnes           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1028*8965ea79SLois Curfman McInnes           else break;
1029*8965ea79SLois Curfman McInnes         }
1030*8965ea79SLois Curfman McInnes         imark = i;
1031*8965ea79SLois Curfman McInnes         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1032*8965ea79SLois Curfman McInnes         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1033*8965ea79SLois Curfman McInnes       }
1034*8965ea79SLois Curfman McInnes     }
1035*8965ea79SLois Curfman McInnes     else {*idx = 0; *v=0;}
1036*8965ea79SLois Curfman McInnes   }
1037*8965ea79SLois Curfman McInnes   *nz = nztot;
1038*8965ea79SLois Curfman McInnes   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1039*8965ea79SLois Curfman McInnes   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1040*8965ea79SLois Curfman McInnes   return 0;
1041*8965ea79SLois Curfman McInnes }
1042*8965ea79SLois Curfman McInnes 
1043*8965ea79SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1044*8965ea79SLois Curfman McInnes {
1045*8965ea79SLois Curfman McInnes   if (idx) PETSCFREE(*idx);
1046*8965ea79SLois Curfman McInnes   if (v) PETSCFREE(*v);
1047*8965ea79SLois Curfman McInnes   return 0;
1048*8965ea79SLois Curfman McInnes }
1049*8965ea79SLois Curfman McInnes 
1050*8965ea79SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1051*8965ea79SLois Curfman McInnes {
1052*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1053*8965ea79SLois Curfman McInnes   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1054*8965ea79SLois Curfman McInnes   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1055*8965ea79SLois Curfman McInnes   double     sum = 0.0;
1056*8965ea79SLois Curfman McInnes   Scalar     *v;
1057*8965ea79SLois Curfman McInnes 
1058*8965ea79SLois Curfman McInnes   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1059*8965ea79SLois Curfman McInnes   if (aij->size == 1) {
1060*8965ea79SLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1061*8965ea79SLois Curfman McInnes   } else {
1062*8965ea79SLois Curfman McInnes     if (type == NORM_FROBENIUS) {
1063*8965ea79SLois Curfman McInnes       v = amat->a;
1064*8965ea79SLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
1065*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX)
1066*8965ea79SLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
1067*8965ea79SLois Curfman McInnes #else
1068*8965ea79SLois Curfman McInnes         sum += (*v)*(*v); v++;
1069*8965ea79SLois Curfman McInnes #endif
1070*8965ea79SLois Curfman McInnes       }
1071*8965ea79SLois Curfman McInnes       v = bmat->a;
1072*8965ea79SLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
1073*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX)
1074*8965ea79SLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
1075*8965ea79SLois Curfman McInnes #else
1076*8965ea79SLois Curfman McInnes         sum += (*v)*(*v); v++;
1077*8965ea79SLois Curfman McInnes #endif
1078*8965ea79SLois Curfman McInnes       }
1079*8965ea79SLois Curfman McInnes       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1080*8965ea79SLois Curfman McInnes       *norm = sqrt(*norm);
1081*8965ea79SLois Curfman McInnes     }
1082*8965ea79SLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
1083*8965ea79SLois Curfman McInnes       double *tmp, *tmp2;
1084*8965ea79SLois Curfman McInnes       int    *jj, *garray = aij->garray;
1085*8965ea79SLois Curfman McInnes       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1086*8965ea79SLois Curfman McInnes       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1087*8965ea79SLois Curfman McInnes       PetscZero(tmp,aij->N*sizeof(double));
1088*8965ea79SLois Curfman McInnes       *norm = 0.0;
1089*8965ea79SLois Curfman McInnes       v = amat->a; jj = amat->j;
1090*8965ea79SLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1091*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX)
1092*8965ea79SLois Curfman McInnes         tmp[cstart + *jj++ + shift] += abs(*v++);
1093*8965ea79SLois Curfman McInnes #else
1094*8965ea79SLois Curfman McInnes         tmp[cstart + *jj++ + shift] += fabs(*v++);
1095*8965ea79SLois Curfman McInnes #endif
1096*8965ea79SLois Curfman McInnes       }
1097*8965ea79SLois Curfman McInnes       v = bmat->a; jj = bmat->j;
1098*8965ea79SLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1099*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX)
1100*8965ea79SLois Curfman McInnes         tmp[garray[*jj++ + shift]] += abs(*v++);
1101*8965ea79SLois Curfman McInnes #else
1102*8965ea79SLois Curfman McInnes         tmp[garray[*jj++ + shift]] += fabs(*v++);
1103*8965ea79SLois Curfman McInnes #endif
1104*8965ea79SLois Curfman McInnes       }
1105*8965ea79SLois Curfman McInnes       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1106*8965ea79SLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
1107*8965ea79SLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
1108*8965ea79SLois Curfman McInnes       }
1109*8965ea79SLois Curfman McInnes       PETSCFREE(tmp); PETSCFREE(tmp2);
1110*8965ea79SLois Curfman McInnes     }
1111*8965ea79SLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1112*8965ea79SLois Curfman McInnes       double normtemp = 0.0;
1113*8965ea79SLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1114*8965ea79SLois Curfman McInnes         v = amat->a + amat->i[j] + shift;
1115*8965ea79SLois Curfman McInnes         sum = 0.0;
1116*8965ea79SLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1117*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX)
1118*8965ea79SLois Curfman McInnes           sum += abs(*v); v++;
1119*8965ea79SLois Curfman McInnes #else
1120*8965ea79SLois Curfman McInnes           sum += fabs(*v); v++;
1121*8965ea79SLois Curfman McInnes #endif
1122*8965ea79SLois Curfman McInnes         }
1123*8965ea79SLois Curfman McInnes         v = bmat->a + bmat->i[j] + shift;
1124*8965ea79SLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1125*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX)
1126*8965ea79SLois Curfman McInnes           sum += abs(*v); v++;
1127*8965ea79SLois Curfman McInnes #else
1128*8965ea79SLois Curfman McInnes           sum += fabs(*v); v++;
1129*8965ea79SLois Curfman McInnes #endif
1130*8965ea79SLois Curfman McInnes         }
1131*8965ea79SLois Curfman McInnes         if (sum > normtemp) normtemp = sum;
1132*8965ea79SLois Curfman McInnes         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1133*8965ea79SLois Curfman McInnes       }
1134*8965ea79SLois Curfman McInnes     }
1135*8965ea79SLois Curfman McInnes     else {
1136*8965ea79SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIRow:No support for two norm");
1137*8965ea79SLois Curfman McInnes     }
1138*8965ea79SLois Curfman McInnes   }
1139*8965ea79SLois Curfman McInnes   return 0;
1140*8965ea79SLois Curfman McInnes }
1141*8965ea79SLois Curfman McInnes 
1142*8965ea79SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1143*8965ea79SLois Curfman McInnes {
1144*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1145*8965ea79SLois Curfman McInnes   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1146*8965ea79SLois Curfman McInnes   int        ierr,shift = Aloc->indexshift;
1147*8965ea79SLois Curfman McInnes   Mat        B;
1148*8965ea79SLois Curfman McInnes   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1149*8965ea79SLois Curfman McInnes   Scalar     *array;
1150*8965ea79SLois Curfman McInnes 
1151*8965ea79SLois Curfman McInnes   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1152*8965ea79SLois Curfman McInnes   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1153*8965ea79SLois Curfman McInnes   CHKERRQ(ierr);
1154*8965ea79SLois Curfman McInnes 
1155*8965ea79SLois Curfman McInnes   /* copy over the A part */
1156*8965ea79SLois Curfman McInnes   Aloc = (Mat_SeqAIJ*) a->A->data;
1157*8965ea79SLois Curfman McInnes   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1158*8965ea79SLois Curfman McInnes   row = a->rstart;
1159*8965ea79SLois Curfman McInnes   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1160*8965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
1161*8965ea79SLois Curfman McInnes     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1162*8965ea79SLois Curfman McInnes     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1163*8965ea79SLois Curfman McInnes   }
1164*8965ea79SLois Curfman McInnes   aj = Aloc->j;
1165*8965ea79SLois Curfman McInnes   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1166*8965ea79SLois Curfman McInnes 
1167*8965ea79SLois Curfman McInnes   /* copy over the B part */
1168*8965ea79SLois Curfman McInnes   Aloc = (Mat_SeqAIJ*) a->B->data;
1169*8965ea79SLois Curfman McInnes   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1170*8965ea79SLois Curfman McInnes   row = a->rstart;
1171*8965ea79SLois Curfman McInnes   ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1172*8965ea79SLois Curfman McInnes   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1173*8965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
1174*8965ea79SLois Curfman McInnes     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1175*8965ea79SLois Curfman McInnes     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1176*8965ea79SLois Curfman McInnes   }
1177*8965ea79SLois Curfman McInnes   PETSCFREE(ct);
1178*8965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1179*8965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1180*8965ea79SLois Curfman McInnes   if (matout) {
1181*8965ea79SLois Curfman McInnes     *matout = B;
1182*8965ea79SLois Curfman McInnes   } else {
1183*8965ea79SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
1184*8965ea79SLois Curfman McInnes     PETSCFREE(a->rowners);
1185*8965ea79SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1186*8965ea79SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1187*8965ea79SLois Curfman McInnes     if (a->colmap) PETSCFREE(a->colmap);
1188*8965ea79SLois Curfman McInnes     if (a->garray) PETSCFREE(a->garray);
1189*8965ea79SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1190*8965ea79SLois Curfman McInnes     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
1191*8965ea79SLois Curfman McInnes     PETSCFREE(a);
1192*8965ea79SLois Curfman McInnes     PetscMemcpy(A,B,sizeof(struct _Mat));
1193*8965ea79SLois Curfman McInnes     PETSCHEADERDESTROY(B);
1194*8965ea79SLois Curfman McInnes   }
1195*8965ea79SLois Curfman McInnes   return 0;
1196*8965ea79SLois Curfman McInnes }
1197*8965ea79SLois Curfman McInnes 
1198*8965ea79SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1199*8965ea79SLois Curfman McInnes static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1200*8965ea79SLois Curfman McInnes 
1201*8965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
1202*8965ea79SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1203*8965ea79SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1204*8965ea79SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1205*8965ea79SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1206*8965ea79SLois Curfman McInnes        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1207*8965ea79SLois Curfman McInnes        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1208*8965ea79SLois Curfman McInnes        MatLUFactor_MPIAIJ,0,
1209*8965ea79SLois Curfman McInnes        MatRelax_MPIAIJ,
1210*8965ea79SLois Curfman McInnes        MatTranspose_MPIAIJ,
1211*8965ea79SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1212*8965ea79SLois Curfman McInnes        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1213*8965ea79SLois Curfman McInnes        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1214*8965ea79SLois Curfman McInnes        0,
1215*8965ea79SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1216*8965ea79SLois Curfman McInnes        MatGetReordering_MPIAIJ,
1217*8965ea79SLois Curfman McInnes        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1218*8965ea79SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1219*8965ea79SLois Curfman McInnes        MatILUFactorSymbolic_MPIAIJ,0,
1220*8965ea79SLois Curfman McInnes        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1221*8965ea79SLois Curfman McInnes 
1222*8965ea79SLois Curfman McInnes /*@C
1223*8965ea79SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1224*8965ea79SLois Curfman McInnes    (the default parallel PETSc format).
1225*8965ea79SLois Curfman McInnes 
1226*8965ea79SLois Curfman McInnes    Input Parameters:
1227*8965ea79SLois Curfman McInnes .  comm - MPI communicator
1228*8965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1229*8965ea79SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated
1230*8965ea79SLois Curfman McInnes            if N is given)
1231*8965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1232*8965ea79SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated
1233*8965ea79SLois Curfman McInnes            if n is given)
1234*8965ea79SLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1235*8965ea79SLois Curfman McInnes            (same for all local rows)
1236*8965ea79SLois Curfman McInnes .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1237*8965ea79SLois Curfman McInnes            or null (possibly different for each row).  You must leave room
1238*8965ea79SLois Curfman McInnes            for the diagonal entry even if it is zero.
1239*8965ea79SLois Curfman McInnes .  o_nz - number of nonzeros per row in off-diagonal portion of local
1240*8965ea79SLois Curfman McInnes            submatrix (same for all local rows).
1241*8965ea79SLois Curfman McInnes .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1242*8965ea79SLois Curfman McInnes            submatrix or null (possibly different for each row).
1243*8965ea79SLois Curfman McInnes 
1244*8965ea79SLois Curfman McInnes    Output Parameter:
1245*8965ea79SLois Curfman McInnes .  newmat - the matrix
1246*8965ea79SLois Curfman McInnes 
1247*8965ea79SLois Curfman McInnes    Notes:
1248*8965ea79SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1249*8965ea79SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
1250*8965ea79SLois Curfman McInnes    storage.  That is, the stored row and column indices begin at
1251*8965ea79SLois Curfman McInnes    one, not zero.  See the users manual for further details.
1252*8965ea79SLois Curfman McInnes 
1253*8965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1254*8965ea79SLois Curfman McInnes    (possibly both).
1255*8965ea79SLois Curfman McInnes 
1256*8965ea79SLois Curfman McInnes    Storage Information:
1257*8965ea79SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1258*8965ea79SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1259*8965ea79SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1260*8965ea79SLois Curfman McInnes    local matrix (a rectangular submatrix).
1261*8965ea79SLois Curfman McInnes 
1262*8965ea79SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
1263*8965ea79SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both). Set both
1264*8965ea79SLois Curfman McInnes    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1265*8965ea79SLois Curfman McInnes    Likewise, specify preallocated storage for the off-diagonal part of
1266*8965ea79SLois Curfman McInnes    the local submatrix with o_nz or o_nnz (not both).
1267*8965ea79SLois Curfman McInnes 
1268*8965ea79SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1269*8965ea79SLois Curfman McInnes 
1270*8965ea79SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1271*8965ea79SLois Curfman McInnes @*/
1272*8965ea79SLois Curfman McInnes int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1273*8965ea79SLois Curfman McInnes                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1274*8965ea79SLois Curfman McInnes {
1275*8965ea79SLois Curfman McInnes   Mat          mat;
1276*8965ea79SLois Curfman McInnes   Mat_MPIAIJ   *a;
1277*8965ea79SLois Curfman McInnes   int          ierr, i,sum[2],work[2];
1278*8965ea79SLois Curfman McInnes 
1279*8965ea79SLois Curfman McInnes   *newmat         = 0;
1280*8965ea79SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1281*8965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
1282*8965ea79SLois Curfman McInnes   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1283*8965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1284*8965ea79SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
1285*8965ea79SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1286*8965ea79SLois Curfman McInnes   mat->factor     = 0;
1287*8965ea79SLois Curfman McInnes 
1288*8965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1289*8965ea79SLois Curfman McInnes   MPI_Comm_rank(comm,&a->rank);
1290*8965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&a->size);
1291*8965ea79SLois Curfman McInnes 
1292*8965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1293*8965ea79SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1294*8965ea79SLois Curfman McInnes 
1295*8965ea79SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1296*8965ea79SLois Curfman McInnes     work[0] = m; work[1] = n;
1297*8965ea79SLois Curfman McInnes     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1298*8965ea79SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1299*8965ea79SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
1300*8965ea79SLois Curfman McInnes   }
1301*8965ea79SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1302*8965ea79SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1303*8965ea79SLois Curfman McInnes   a->m = m;
1304*8965ea79SLois Curfman McInnes   a->n = n;
1305*8965ea79SLois Curfman McInnes   a->N = N;
1306*8965ea79SLois Curfman McInnes   a->M = M;
1307*8965ea79SLois Curfman McInnes 
1308*8965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
1309*8965ea79SLois Curfman McInnes   a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1310*8965ea79SLois Curfman McInnes   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
1311*8965ea79SLois Curfman McInnes                        sizeof(Mat_MPIAIJ));
1312*8965ea79SLois Curfman McInnes   a->cowners = a->rowners + a->size + 1;
1313*8965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1314*8965ea79SLois Curfman McInnes   a->rowners[0] = 0;
1315*8965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1316*8965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
1317*8965ea79SLois Curfman McInnes   }
1318*8965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
1319*8965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1320*8965ea79SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1321*8965ea79SLois Curfman McInnes   a->cowners[0] = 0;
1322*8965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
1323*8965ea79SLois Curfman McInnes     a->cowners[i] += a->cowners[i-1];
1324*8965ea79SLois Curfman McInnes   }
1325*8965ea79SLois Curfman McInnes   a->cstart = a->cowners[a->rank];
1326*8965ea79SLois Curfman McInnes   a->cend   = a->cowners[a->rank+1];
1327*8965ea79SLois Curfman McInnes 
1328*8965ea79SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1329*8965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
1330*8965ea79SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1331*8965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->B);
1332*8965ea79SLois Curfman McInnes 
1333*8965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
1334*8965ea79SLois Curfman McInnes   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1335*8965ea79SLois Curfman McInnes   a->colmap    = 0;
1336*8965ea79SLois Curfman McInnes   a->garray    = 0;
1337*8965ea79SLois Curfman McInnes 
1338*8965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
1339*8965ea79SLois Curfman McInnes   a->lvec      = 0;
1340*8965ea79SLois Curfman McInnes   a->Mvctx     = 0;
1341*8965ea79SLois Curfman McInnes   a->assembled = 0;
1342*8965ea79SLois Curfman McInnes 
1343*8965ea79SLois Curfman McInnes   *newmat = mat;
1344*8965ea79SLois Curfman McInnes   return 0;
1345*8965ea79SLois Curfman McInnes }
1346*8965ea79SLois Curfman McInnes 
1347*8965ea79SLois Curfman McInnes static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1348*8965ea79SLois Curfman McInnes {
1349*8965ea79SLois Curfman McInnes   Mat        mat;
1350*8965ea79SLois Curfman McInnes   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1351*8965ea79SLois Curfman McInnes   int        ierr, len;
1352*8965ea79SLois Curfman McInnes 
1353*8965ea79SLois Curfman McInnes   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1354*8965ea79SLois Curfman McInnes   *newmat       = 0;
1355*8965ea79SLois Curfman McInnes   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1356*8965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
1357*8965ea79SLois Curfman McInnes   mat->data     = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1358*8965ea79SLois Curfman McInnes   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1359*8965ea79SLois Curfman McInnes   mat->destroy  = MatDestroy_MPIAIJ;
1360*8965ea79SLois Curfman McInnes   mat->view     = MatView_MPIAIJ;
1361*8965ea79SLois Curfman McInnes   mat->factor   = matin->factor;
1362*8965ea79SLois Curfman McInnes 
1363*8965ea79SLois Curfman McInnes   a->m          = oldmat->m;
1364*8965ea79SLois Curfman McInnes   a->n          = oldmat->n;
1365*8965ea79SLois Curfman McInnes   a->M          = oldmat->M;
1366*8965ea79SLois Curfman McInnes   a->N          = oldmat->N;
1367*8965ea79SLois Curfman McInnes 
1368*8965ea79SLois Curfman McInnes   a->assembled  = 1;
1369*8965ea79SLois Curfman McInnes   a->rstart     = oldmat->rstart;
1370*8965ea79SLois Curfman McInnes   a->rend       = oldmat->rend;
1371*8965ea79SLois Curfman McInnes   a->cstart     = oldmat->cstart;
1372*8965ea79SLois Curfman McInnes   a->cend       = oldmat->cend;
1373*8965ea79SLois Curfman McInnes   a->size       = oldmat->size;
1374*8965ea79SLois Curfman McInnes   a->rank       = oldmat->rank;
1375*8965ea79SLois Curfman McInnes   a->insertmode = NOT_SET_VALUES;
1376*8965ea79SLois Curfman McInnes 
1377*8965ea79SLois Curfman McInnes   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1378*8965ea79SLois Curfman McInnes   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1379*8965ea79SLois Curfman McInnes   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1380*8965ea79SLois Curfman McInnes   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1381*8965ea79SLois Curfman McInnes   if (oldmat->colmap) {
1382*8965ea79SLois Curfman McInnes     a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1383*8965ea79SLois Curfman McInnes     PLogObjectMemory(mat,(a->N)*sizeof(int));
1384*8965ea79SLois Curfman McInnes     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1385*8965ea79SLois Curfman McInnes   } else a->colmap = 0;
1386*8965ea79SLois Curfman McInnes   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1387*8965ea79SLois Curfman McInnes     a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray);
1388*8965ea79SLois Curfman McInnes     PLogObjectMemory(mat,len*sizeof(int));
1389*8965ea79SLois Curfman McInnes     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1390*8965ea79SLois Curfman McInnes   } else a->garray = 0;
1391*8965ea79SLois Curfman McInnes 
1392*8965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1393*8965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
1394*8965ea79SLois Curfman McInnes   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1395*8965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
1396*8965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1397*8965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
1398*8965ea79SLois Curfman McInnes   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1399*8965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->B);
1400*8965ea79SLois Curfman McInnes   *newmat = mat;
1401*8965ea79SLois Curfman McInnes   return 0;
1402*8965ea79SLois Curfman McInnes }
1403*8965ea79SLois Curfman McInnes 
1404*8965ea79SLois Curfman McInnes #include "sysio.h"
1405*8965ea79SLois Curfman McInnes 
1406*8965ea79SLois Curfman McInnes int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1407*8965ea79SLois Curfman McInnes {
1408*8965ea79SLois Curfman McInnes   Mat          A;
1409*8965ea79SLois Curfman McInnes   int          i, nz, ierr, j,rstart, rend, fd;
1410*8965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
1411*8965ea79SLois Curfman McInnes   PetscObject  vobj = (PetscObject) bview;
1412*8965ea79SLois Curfman McInnes   MPI_Comm     comm = vobj->comm;
1413*8965ea79SLois Curfman McInnes   MPI_Status   status;
1414*8965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1415*8965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1416*8965ea79SLois Curfman McInnes   int          tag = ((PetscObject)bview)->tag;
1417*8965ea79SLois Curfman McInnes 
1418*8965ea79SLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1419*8965ea79SLois Curfman McInnes   if (!rank) {
1420*8965ea79SLois Curfman McInnes     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1421*8965ea79SLois Curfman McInnes     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1422*8965ea79SLois Curfman McInnes     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1423*8965ea79SLois Curfman McInnes   }
1424*8965ea79SLois Curfman McInnes 
1425*8965ea79SLois Curfman McInnes   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1426*8965ea79SLois Curfman McInnes   M = header[1]; N = header[2];
1427*8965ea79SLois Curfman McInnes   /* determine ownership of all rows */
1428*8965ea79SLois Curfman McInnes   m = M/size + ((M % size) > rank);
1429*8965ea79SLois Curfman McInnes   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
1430*8965ea79SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1431*8965ea79SLois Curfman McInnes   rowners[0] = 0;
1432*8965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1433*8965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
1434*8965ea79SLois Curfman McInnes   }
1435*8965ea79SLois Curfman McInnes   rstart = rowners[rank];
1436*8965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
1437*8965ea79SLois Curfman McInnes 
1438*8965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
1439*8965ea79SLois Curfman McInnes   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1440*8965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
1441*8965ea79SLois Curfman McInnes   if (!rank) {
1442*8965ea79SLois Curfman McInnes     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1443*8965ea79SLois Curfman McInnes     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1444*8965ea79SLois Curfman McInnes     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
1445*8965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1446*8965ea79SLois Curfman McInnes     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1447*8965ea79SLois Curfman McInnes     PETSCFREE(sndcounts);
1448*8965ea79SLois Curfman McInnes   }
1449*8965ea79SLois Curfman McInnes   else {
1450*8965ea79SLois Curfman McInnes     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1451*8965ea79SLois Curfman McInnes   }
1452*8965ea79SLois Curfman McInnes 
1453*8965ea79SLois Curfman McInnes   if (!rank) {
1454*8965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
1455*8965ea79SLois Curfman McInnes     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
1456*8965ea79SLois Curfman McInnes     PetscZero(procsnz,size*sizeof(int));
1457*8965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1458*8965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1459*8965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
1460*8965ea79SLois Curfman McInnes       }
1461*8965ea79SLois Curfman McInnes     }
1462*8965ea79SLois Curfman McInnes     PETSCFREE(rowlengths);
1463*8965ea79SLois Curfman McInnes 
1464*8965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
1465*8965ea79SLois Curfman McInnes     maxnz = 0;
1466*8965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1467*8965ea79SLois Curfman McInnes       maxnz = PETSCMAX(maxnz,procsnz[i]);
1468*8965ea79SLois Curfman McInnes     }
1469*8965ea79SLois Curfman McInnes     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1470*8965ea79SLois Curfman McInnes 
1471*8965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
1472*8965ea79SLois Curfman McInnes     nz = procsnz[0];
1473*8965ea79SLois Curfman McInnes     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1474*8965ea79SLois Curfman McInnes     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1475*8965ea79SLois Curfman McInnes 
1476*8965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
1477*8965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1478*8965ea79SLois Curfman McInnes       nz = procsnz[i];
1479*8965ea79SLois Curfman McInnes       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1480*8965ea79SLois Curfman McInnes       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1481*8965ea79SLois Curfman McInnes     }
1482*8965ea79SLois Curfman McInnes     PETSCFREE(cols);
1483*8965ea79SLois Curfman McInnes   }
1484*8965ea79SLois Curfman McInnes   else {
1485*8965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
1486*8965ea79SLois Curfman McInnes     nz = 0;
1487*8965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
1488*8965ea79SLois Curfman McInnes       nz += ourlens[i];
1489*8965ea79SLois Curfman McInnes     }
1490*8965ea79SLois Curfman McInnes     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1491*8965ea79SLois Curfman McInnes 
1492*8965ea79SLois Curfman McInnes     /* receive message of column indices*/
1493*8965ea79SLois Curfman McInnes     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1494*8965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPI_INT,&maxnz);
1495*8965ea79SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1496*8965ea79SLois Curfman McInnes   }
1497*8965ea79SLois Curfman McInnes 
1498*8965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1499*8965ea79SLois Curfman McInnes   PetscZero(offlens,m*sizeof(int));
1500*8965ea79SLois Curfman McInnes   jj = 0;
1501*8965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
1502*8965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
1503*8965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1504*8965ea79SLois Curfman McInnes       jj++;
1505*8965ea79SLois Curfman McInnes     }
1506*8965ea79SLois Curfman McInnes   }
1507*8965ea79SLois Curfman McInnes 
1508*8965ea79SLois Curfman McInnes   /* create our matrix */
1509*8965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
1510*8965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
1511*8965ea79SLois Curfman McInnes   }
1512*8965ea79SLois Curfman McInnes   if (type == MATMPIAIJ) {
1513*8965ea79SLois Curfman McInnes     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1514*8965ea79SLois Curfman McInnes   }
1515*8965ea79SLois Curfman McInnes   else if (type == MATMPIROW) {
1516*8965ea79SLois Curfman McInnes     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1517*8965ea79SLois Curfman McInnes   }
1518*8965ea79SLois Curfman McInnes   A = *newmat;
1519*8965ea79SLois Curfman McInnes   MatSetOption(A,COLUMNS_SORTED);
1520*8965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
1521*8965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
1522*8965ea79SLois Curfman McInnes   }
1523*8965ea79SLois Curfman McInnes 
1524*8965ea79SLois Curfman McInnes   if (!rank) {
1525*8965ea79SLois Curfman McInnes     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1526*8965ea79SLois Curfman McInnes 
1527*8965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
1528*8965ea79SLois Curfman McInnes     nz = procsnz[0];
1529*8965ea79SLois Curfman McInnes     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1530*8965ea79SLois Curfman McInnes 
1531*8965ea79SLois Curfman McInnes     /* insert into matrix */
1532*8965ea79SLois Curfman McInnes     jj      = rstart;
1533*8965ea79SLois Curfman McInnes     smycols = mycols;
1534*8965ea79SLois Curfman McInnes     svals   = vals;
1535*8965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
1536*8965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1537*8965ea79SLois Curfman McInnes       smycols += ourlens[i];
1538*8965ea79SLois Curfman McInnes       svals   += ourlens[i];
1539*8965ea79SLois Curfman McInnes       jj++;
1540*8965ea79SLois Curfman McInnes     }
1541*8965ea79SLois Curfman McInnes 
1542*8965ea79SLois Curfman McInnes     /* read in other processors and ship out */
1543*8965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1544*8965ea79SLois Curfman McInnes       nz = procsnz[i];
1545*8965ea79SLois Curfman McInnes       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1546*8965ea79SLois Curfman McInnes       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1547*8965ea79SLois Curfman McInnes     }
1548*8965ea79SLois Curfman McInnes     PETSCFREE(procsnz);
1549*8965ea79SLois Curfman McInnes   }
1550*8965ea79SLois Curfman McInnes   else {
1551*8965ea79SLois Curfman McInnes     /* receive numeric values */
1552*8965ea79SLois Curfman McInnes     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1553*8965ea79SLois Curfman McInnes 
1554*8965ea79SLois Curfman McInnes     /* receive message of values*/
1555*8965ea79SLois Curfman McInnes     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1556*8965ea79SLois Curfman McInnes     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1557*8965ea79SLois Curfman McInnes     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1558*8965ea79SLois Curfman McInnes 
1559*8965ea79SLois Curfman McInnes     /* insert into matrix */
1560*8965ea79SLois Curfman McInnes     jj      = rstart;
1561*8965ea79SLois Curfman McInnes     smycols = mycols;
1562*8965ea79SLois Curfman McInnes     svals   = vals;
1563*8965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
1564*8965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1565*8965ea79SLois Curfman McInnes       smycols += ourlens[i];
1566*8965ea79SLois Curfman McInnes       svals   += ourlens[i];
1567*8965ea79SLois Curfman McInnes       jj++;
1568*8965ea79SLois Curfman McInnes     }
1569*8965ea79SLois Curfman McInnes   }
1570*8965ea79SLois Curfman McInnes   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1571*8965ea79SLois Curfman McInnes 
1572*8965ea79SLois Curfman McInnes   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1573*8965ea79SLois Curfman McInnes   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1574*8965ea79SLois Curfman McInnes   return 0;
1575*8965ea79SLois Curfman McInnes }
1576