xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d6dfbf8f6aa26f18ad3ebf3c96fa582aa9714225)
18a729477SBarry Smith 
21eb62cbbSBarry Smith #include "mpiaij.h"
38a729477SBarry Smith #include "vec/vecimpl.h"
4*d6dfbf8fSBarry Smith #include "inline/spops.h"
58a729477SBarry Smith 
61eb62cbbSBarry Smith #define CHUNCKSIZE   100
71eb62cbbSBarry Smith /*
81eb62cbbSBarry Smith    This is a simple minded stash. Do a linear search to determine if
91eb62cbbSBarry Smith  in stash, if not add to end.
101eb62cbbSBarry Smith */
111eb62cbbSBarry Smith static int StashValues(Stash *stash,int row,int n, int *idxn,
121eb62cbbSBarry Smith                        Scalar *values,InsertMode addv)
138a729477SBarry Smith {
141eb62cbbSBarry Smith   int    i,j,N = stash->n,found,*n_idx, *n_idy;
151eb62cbbSBarry Smith   Scalar val,*n_array;
168a729477SBarry Smith 
171eb62cbbSBarry Smith   for ( i=0; i<n; i++ ) {
181eb62cbbSBarry Smith     found = 0;
191eb62cbbSBarry Smith     val = *values++;
208a729477SBarry Smith     for ( j=0; j<N; j++ ) {
211eb62cbbSBarry Smith       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
221eb62cbbSBarry Smith         /* found a match */
231eb62cbbSBarry Smith         if (addv == AddValues) stash->array[j] += val;
241eb62cbbSBarry Smith         else stash->array[j] = val;
251eb62cbbSBarry Smith         found = 1;
268a729477SBarry Smith         break;
278a729477SBarry Smith       }
288a729477SBarry Smith     }
291eb62cbbSBarry Smith     if (!found) { /* not found so add to end */
301eb62cbbSBarry Smith       if ( stash->n == stash->nmax ) {
311eb62cbbSBarry Smith         /* allocate a larger stash */
321eb62cbbSBarry Smith         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
331eb62cbbSBarry Smith                                      2*sizeof(int) + sizeof(Scalar)));
341eb62cbbSBarry Smith         CHKPTR(n_array);
351eb62cbbSBarry Smith         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
361eb62cbbSBarry Smith         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
371eb62cbbSBarry Smith         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
381eb62cbbSBarry Smith         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
391eb62cbbSBarry Smith         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
401eb62cbbSBarry Smith         if (stash->array) FREE(stash->array);
411eb62cbbSBarry Smith         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
421eb62cbbSBarry Smith         stash->nmax += CHUNCKSIZE;
431eb62cbbSBarry Smith       }
441eb62cbbSBarry Smith       stash->array[stash->n]   = val;
451eb62cbbSBarry Smith       stash->idx[stash->n]     = row;
461eb62cbbSBarry Smith       stash->idy[stash->n++]   = idxn[i];
471eb62cbbSBarry Smith     }
488a729477SBarry Smith   }
498a729477SBarry Smith   return 0;
508a729477SBarry Smith }
518a729477SBarry Smith 
521eb62cbbSBarry Smith static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
531eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
548a729477SBarry Smith {
551eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
561eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
571eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
588a729477SBarry Smith 
591eb62cbbSBarry Smith   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
601eb62cbbSBarry Smith     SETERR(1,"You cannot mix inserts and adds");
618a729477SBarry Smith   }
621eb62cbbSBarry Smith   aij->insertmode = addv;
638a729477SBarry Smith   for ( i=0; i<m; i++ ) {
64*d6dfbf8fSBarry Smith     if (idxm[i] < 0) continue;
651eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
661eb62cbbSBarry Smith       row = idxm[i] - rstart;
671eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
68*d6dfbf8fSBarry Smith         if (idxn[j] < 0) continue;
691eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
701eb62cbbSBarry Smith           col = idxn[j] - cstart;
711eb62cbbSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
721eb62cbbSBarry Smith         }
731eb62cbbSBarry Smith         else {
74*d6dfbf8fSBarry Smith           if (aij->assembled) {
75*d6dfbf8fSBarry Smith             SETERR(1,"Cannot insert off diagonal block in already\
76*d6dfbf8fSBarry Smith                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
77*d6dfbf8fSBarry Smith                      if your need this feature");
78*d6dfbf8fSBarry Smith           }
791eb62cbbSBarry Smith           col = idxn[j];
801eb62cbbSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
811eb62cbbSBarry Smith         }
821eb62cbbSBarry Smith       }
831eb62cbbSBarry Smith     }
841eb62cbbSBarry Smith     else {
851eb62cbbSBarry Smith       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
861eb62cbbSBarry Smith     }
878a729477SBarry Smith   }
888a729477SBarry Smith   return 0;
898a729477SBarry Smith }
908a729477SBarry Smith 
918a729477SBarry Smith /*
921eb62cbbSBarry Smith     the assembly code is alot like the code for vectors, we should
931eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
941eb62cbbSBarry Smith     either case.
958a729477SBarry Smith */
968a729477SBarry Smith 
971eb62cbbSBarry Smith static int MatiAIJBeginAssemble(Mat mat)
988a729477SBarry Smith {
991eb62cbbSBarry Smith   Matimpiaij  *aij = (Matimpiaij *) mat->data;
100*d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
1011eb62cbbSBarry Smith   int         ierr, numtids = aij->numtids, *owners = aij->rowners;
1021eb62cbbSBarry Smith   int         mytid = aij->mytid;
1031eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1041eb62cbbSBarry Smith   int         *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work;
1051eb62cbbSBarry Smith   int         tag = 50, *owner,*starts,count;
1061eb62cbbSBarry Smith   InsertMode  addv;
1071eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1081eb62cbbSBarry Smith 
1091eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
1101eb62cbbSBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT,
1111eb62cbbSBarry Smith                 MPI_BOR,comm);
1121eb62cbbSBarry Smith   if (addv == (AddValues|InsertValues)) {
1131eb62cbbSBarry Smith     SETERR(1,"Some processors have inserted while others have added");
1141eb62cbbSBarry Smith   }
1151eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1161eb62cbbSBarry Smith 
1171eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1181eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
1191eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
1201eb62cbbSBarry Smith   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
1211eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1221eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1231eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1241eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1251eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1268a729477SBarry Smith       }
1278a729477SBarry Smith     }
1288a729477SBarry Smith   }
1291eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1301eb62cbbSBarry Smith 
1311eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1321eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
1331eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1341eb62cbbSBarry Smith   nreceives = work[mytid];
1351eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1361eb62cbbSBarry Smith   nmax = work[mytid];
1371eb62cbbSBarry Smith   FREE(work);
1381eb62cbbSBarry Smith 
1391eb62cbbSBarry Smith   /* post receives:
1401eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1411eb62cbbSBarry Smith      (global index,value) we store the global index as a double
142*d6dfbf8fSBarry Smith      to simplify the message passing.
1431eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1441eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1451eb62cbbSBarry Smith      this is a lot of wasted space.
1461eb62cbbSBarry Smith 
1471eb62cbbSBarry Smith 
1481eb62cbbSBarry Smith        This could be done better.
1491eb62cbbSBarry Smith   */
1501eb62cbbSBarry Smith   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar));
1511eb62cbbSBarry Smith   CHKPTR(rvalues);
1521eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
1531eb62cbbSBarry Smith   CHKPTR(recv_waits);
1541eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
1551eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
1561eb62cbbSBarry Smith               comm,recv_waits+i);
1571eb62cbbSBarry Smith   }
1581eb62cbbSBarry Smith 
1591eb62cbbSBarry Smith   /* do sends:
1601eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1611eb62cbbSBarry Smith          the ith processor
1621eb62cbbSBarry Smith   */
1631eb62cbbSBarry Smith   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
1641eb62cbbSBarry Smith   CHKPTR(svalues);
1651eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
1661eb62cbbSBarry Smith   CHKPTR(send_waits);
1671eb62cbbSBarry Smith   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
1681eb62cbbSBarry Smith   starts[0] = 0;
1691eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1701eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1711eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1721eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1731eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1741eb62cbbSBarry Smith   }
1751eb62cbbSBarry Smith   FREE(owner);
1761eb62cbbSBarry Smith   starts[0] = 0;
1771eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1781eb62cbbSBarry Smith   count = 0;
1791eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1801eb62cbbSBarry Smith     if (procs[i]) {
1811eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
1821eb62cbbSBarry Smith                 comm,send_waits+count++);
1831eb62cbbSBarry Smith     }
1841eb62cbbSBarry Smith   }
1851eb62cbbSBarry Smith   FREE(starts); FREE(nprocs);
1861eb62cbbSBarry Smith 
1871eb62cbbSBarry Smith   /* Free cache space */
1881eb62cbbSBarry Smith   aij->stash.nmax = aij->stash.n = 0;
1891eb62cbbSBarry Smith   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
1901eb62cbbSBarry Smith 
1911eb62cbbSBarry Smith   aij->svalues    = svalues;       aij->rvalues = rvalues;
1921eb62cbbSBarry Smith   aij->nsends     = nsends;         aij->nrecvs = nreceives;
1931eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1941eb62cbbSBarry Smith   aij->rmax       = nmax;
1951eb62cbbSBarry Smith 
1961eb62cbbSBarry Smith   return 0;
1971eb62cbbSBarry Smith }
1981eb62cbbSBarry Smith extern int MPIAIJSetUpMultiply(Mat);
1991eb62cbbSBarry Smith 
2001eb62cbbSBarry Smith static int MatiAIJEndAssemble(Mat mat)
2011eb62cbbSBarry Smith {
2021eb62cbbSBarry Smith   int        ierr;
2031eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
2041eb62cbbSBarry Smith 
2051eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
206*d6dfbf8fSBarry Smith   int         imdex,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n;
2071eb62cbbSBarry Smith   int         row,col;
2081eb62cbbSBarry Smith   Scalar      *values,val;
2091eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2101eb62cbbSBarry Smith 
2111eb62cbbSBarry Smith   /*  wait on receives */
2121eb62cbbSBarry Smith   while (count) {
213*d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2141eb62cbbSBarry Smith     /* unpack receives into our local space */
215*d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
2161eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
2171eb62cbbSBarry Smith     n = n/3;
2181eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2191eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2201eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2211eb62cbbSBarry Smith       val = values[3*i+2];
2221eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2231eb62cbbSBarry Smith           col -= aij->cstart;
2241eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2251eb62cbbSBarry Smith       }
2261eb62cbbSBarry Smith       else {
227*d6dfbf8fSBarry Smith         if (aij->assembled) {
228*d6dfbf8fSBarry Smith           SETERR(1,"Cannot insert off diagonal block in already\
229*d6dfbf8fSBarry Smith                    assembled matrix. Contact petsc-maint@mcs.anl.gov\
230*d6dfbf8fSBarry Smith                    if your need this feature");
231*d6dfbf8fSBarry Smith         }
2321eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2331eb62cbbSBarry Smith       }
2341eb62cbbSBarry Smith     }
2351eb62cbbSBarry Smith     count--;
2361eb62cbbSBarry Smith   }
2371eb62cbbSBarry Smith   FREE(aij->recv_waits); FREE(aij->rvalues);
2381eb62cbbSBarry Smith 
2391eb62cbbSBarry Smith   /* wait on sends */
2401eb62cbbSBarry Smith   if (aij->nsends) {
2411eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
2421eb62cbbSBarry Smith     CHKPTR(send_status);
2431eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2441eb62cbbSBarry Smith     FREE(send_status);
2451eb62cbbSBarry Smith   }
2461eb62cbbSBarry Smith   FREE(aij->send_waits); FREE(aij->svalues);
2471eb62cbbSBarry Smith 
2481eb62cbbSBarry Smith   aij->insertmode = NotSetValues;
2491eb62cbbSBarry Smith   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
2501eb62cbbSBarry Smith   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
2511eb62cbbSBarry Smith 
252*d6dfbf8fSBarry Smith 
253*d6dfbf8fSBarry Smith   if (aij->assembled) {
254*d6dfbf8fSBarry Smith     /* this is because the present implimentation doesn't support
255*d6dfbf8fSBarry Smith      inserting values off the diagonal block.*/
256*d6dfbf8fSBarry Smith     return 0;
257*d6dfbf8fSBarry Smith   }
258*d6dfbf8fSBarry Smith 
2591eb62cbbSBarry Smith   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
2601eb62cbbSBarry Smith   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
2611eb62cbbSBarry Smith   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
262*d6dfbf8fSBarry Smith 
263*d6dfbf8fSBarry Smith   aij->assembled = 1;
2648a729477SBarry Smith   return 0;
2658a729477SBarry Smith }
2668a729477SBarry Smith 
2671eb62cbbSBarry Smith static int MatiZero(Mat A)
2681eb62cbbSBarry Smith {
2691eb62cbbSBarry Smith   Matimpiaij *l = (Matimpiaij *) A->data;
2701eb62cbbSBarry Smith 
2711eb62cbbSBarry Smith   MatZeroEntries(l->A); MatZeroEntries(l->B);
2721eb62cbbSBarry Smith   return 0;
2731eb62cbbSBarry Smith }
2741eb62cbbSBarry Smith 
2751eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2761eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2771eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2781eb62cbbSBarry Smith 
2791eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2801eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2811eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2821eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2831eb62cbbSBarry Smith    routine.
2841eb62cbbSBarry Smith */
2851eb62cbbSBarry Smith static int MatiZerorows(Mat A,IS is,Scalar *diag)
2861eb62cbbSBarry Smith {
2871eb62cbbSBarry Smith   Matimpiaij     *l = (Matimpiaij *) A->data;
2881eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2891eb62cbbSBarry Smith   int            *localrows,*procs,*nprocs,j,found,idx,nsends,*work;
2901eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2911eb62cbbSBarry Smith   int            *rvalues,tag = 67,count,base,slen,n,len,*source;
292*d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
293*d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2941eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2951eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2961eb62cbbSBarry Smith   IS             istmp;
2971eb62cbbSBarry Smith 
2981eb62cbbSBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
2991eb62cbbSBarry Smith   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
3001eb62cbbSBarry Smith 
3011eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3021eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
3031eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
3041eb62cbbSBarry Smith   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
3051eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3061eb62cbbSBarry Smith     idx = rows[i];
3071eb62cbbSBarry Smith     found = 0;
3081eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
3091eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3101eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3111eb62cbbSBarry Smith       }
3121eb62cbbSBarry Smith     }
313*d6dfbf8fSBarry Smith     if (!found) SETERR(1,"Imdex out of range");
3141eb62cbbSBarry Smith   }
3151eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
3161eb62cbbSBarry Smith 
3171eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3181eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
3191eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3201eb62cbbSBarry Smith   nrecvs = work[mytid];
3211eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3221eb62cbbSBarry Smith   nmax = work[mytid];
3231eb62cbbSBarry Smith   FREE(work);
3241eb62cbbSBarry Smith 
3251eb62cbbSBarry Smith   /* post receives:   */
3261eb62cbbSBarry Smith   rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */
3271eb62cbbSBarry Smith   CHKPTR(rvalues);
3281eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
3291eb62cbbSBarry Smith   CHKPTR(recv_waits);
3301eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3311eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3321eb62cbbSBarry Smith               comm,recv_waits+i);
3331eb62cbbSBarry Smith   }
3341eb62cbbSBarry Smith 
3351eb62cbbSBarry Smith   /* do sends:
3361eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3371eb62cbbSBarry Smith          the ith processor
3381eb62cbbSBarry Smith   */
3391eb62cbbSBarry Smith   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
3401eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
3411eb62cbbSBarry Smith   CHKPTR(send_waits);
3421eb62cbbSBarry Smith   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
3431eb62cbbSBarry Smith   starts[0] = 0;
3441eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3451eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3461eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3471eb62cbbSBarry Smith   }
3481eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3491eb62cbbSBarry Smith 
3501eb62cbbSBarry Smith   starts[0] = 0;
3511eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3521eb62cbbSBarry Smith   count = 0;
3531eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3541eb62cbbSBarry Smith     if (procs[i]) {
3551eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3561eb62cbbSBarry Smith                 comm,send_waits+count++);
3571eb62cbbSBarry Smith     }
3581eb62cbbSBarry Smith   }
3591eb62cbbSBarry Smith   FREE(starts);
3601eb62cbbSBarry Smith 
3611eb62cbbSBarry Smith   base = owners[mytid];
3621eb62cbbSBarry Smith 
3631eb62cbbSBarry Smith   /*  wait on receives */
3641eb62cbbSBarry Smith   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
3651eb62cbbSBarry Smith   source = lens + nrecvs;
3661eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3671eb62cbbSBarry Smith   while (count) {
368*d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3691eb62cbbSBarry Smith     /* unpack receives into our local space */
3701eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
371*d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
372*d6dfbf8fSBarry Smith     lens[imdex]  = n;
3731eb62cbbSBarry Smith     slen += n;
3741eb62cbbSBarry Smith     count--;
3751eb62cbbSBarry Smith   }
3761eb62cbbSBarry Smith   FREE(recv_waits);
3771eb62cbbSBarry Smith 
3781eb62cbbSBarry Smith   /* move the data into the send scatter */
3791eb62cbbSBarry Smith   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
3801eb62cbbSBarry Smith   count = 0;
3811eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3821eb62cbbSBarry Smith     values = rvalues + i*nmax;
3831eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3841eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3851eb62cbbSBarry Smith     }
3861eb62cbbSBarry Smith   }
3871eb62cbbSBarry Smith   FREE(rvalues); FREE(lens);
3881eb62cbbSBarry Smith   FREE(owner); FREE(nprocs);
3891eb62cbbSBarry Smith 
3901eb62cbbSBarry Smith   /* actually zap the local rows */
3911eb62cbbSBarry Smith   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
3921eb62cbbSBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
3931eb62cbbSBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
3941eb62cbbSBarry Smith   ierr = ISDestroy(istmp); CHKERR(ierr);
3951eb62cbbSBarry Smith 
3961eb62cbbSBarry Smith   /* wait on sends */
3971eb62cbbSBarry Smith   if (nsends) {
3981eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
3991eb62cbbSBarry Smith     CHKPTR(send_status);
4001eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4011eb62cbbSBarry Smith     FREE(send_status);
4021eb62cbbSBarry Smith   }
4031eb62cbbSBarry Smith   FREE(send_waits); FREE(svalues);
4041eb62cbbSBarry Smith 
4051eb62cbbSBarry Smith 
4061eb62cbbSBarry Smith   return 0;
4071eb62cbbSBarry Smith }
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
4101eb62cbbSBarry Smith {
4111eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
4121eb62cbbSBarry Smith   int        ierr;
4131eb62cbbSBarry Smith 
414*d6dfbf8fSBarry Smith   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
4151eb62cbbSBarry Smith   CHKERR(ierr);
4161eb62cbbSBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
417*d6dfbf8fSBarry Smith   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
418*d6dfbf8fSBarry Smith   CHKERR(ierr);
4191eb62cbbSBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
4201eb62cbbSBarry Smith   return 0;
4211eb62cbbSBarry Smith }
4221eb62cbbSBarry Smith 
4231eb62cbbSBarry Smith /*
4241eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4251eb62cbbSBarry Smith    diagonal block
4261eb62cbbSBarry Smith */
4271eb62cbbSBarry Smith static int MatiAIJgetdiag(Mat Ain,Vec v)
4281eb62cbbSBarry Smith {
4291eb62cbbSBarry Smith   Matimpiaij *A = (Matimpiaij *) Ain->data;
4301eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4311eb62cbbSBarry Smith }
4321eb62cbbSBarry Smith 
4331eb62cbbSBarry Smith static int MatiAIJdestroy(PetscObject obj)
4341eb62cbbSBarry Smith {
4351eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
4361eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
4371eb62cbbSBarry Smith   int        ierr;
4381eb62cbbSBarry Smith   FREE(aij->rowners);
4391eb62cbbSBarry Smith   ierr = MatDestroy(aij->A); CHKERR(ierr);
4401eb62cbbSBarry Smith   ierr = MatDestroy(aij->B); CHKERR(ierr);
4411eb62cbbSBarry Smith   FREE(aij); FREE(mat);
4421eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
4431eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
4441eb62cbbSBarry Smith   return 0;
4451eb62cbbSBarry Smith }
4461eb62cbbSBarry Smith 
4471eb62cbbSBarry Smith static int MatiView(PetscObject obj,Viewer viewer)
4481eb62cbbSBarry Smith {
4491eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
4501eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
4511eb62cbbSBarry Smith   int        ierr;
4521eb62cbbSBarry Smith 
453*d6dfbf8fSBarry Smith   MPE_Seq_begin(mat->comm,1);
4541eb62cbbSBarry Smith   printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
4551eb62cbbSBarry Smith           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
4561eb62cbbSBarry Smith           aij->cend);
4571eb62cbbSBarry Smith   ierr = MatView(aij->A,viewer); CHKERR(ierr);
4581eb62cbbSBarry Smith   ierr = MatView(aij->B,viewer); CHKERR(ierr);
459*d6dfbf8fSBarry Smith   MPE_Seq_end(mat->comm,1);
4601eb62cbbSBarry Smith   return 0;
4611eb62cbbSBarry Smith }
4621eb62cbbSBarry Smith 
463*d6dfbf8fSBarry Smith extern int MatiAIJmarkdiag(Matiaij  *);
4641eb62cbbSBarry Smith /*
4651eb62cbbSBarry Smith     This has to provide several versions.
4661eb62cbbSBarry Smith 
4671eb62cbbSBarry Smith      1) per sequential
4681eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
4691eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
470*d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
4711eb62cbbSBarry Smith */
472*d6dfbf8fSBarry Smith static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
4738a729477SBarry Smith                         int its,Vec xx)
4748a729477SBarry Smith {
4751eb62cbbSBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
476*d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
477*d6dfbf8fSBarry Smith   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
478*d6dfbf8fSBarry Smith   Scalar     zero = 0.0,*b,*x,*w,*xs,*ls,d,*v,sum;
479*d6dfbf8fSBarry Smith   int        ierr,one = 1, tmp, *idx, *diag,mytid;
4808a729477SBarry Smith   int        n = mat->n, m = mat->m, i, j;
4818a729477SBarry Smith 
4828a729477SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
483*d6dfbf8fSBarry Smith     /* could be more efficient by incorporating with below */
4848a729477SBarry Smith     if (ierr = VecSet(&zero,xx)) return ierr;
4858a729477SBarry Smith   }
4861eb62cbbSBarry Smith 
487*d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
488*d6dfbf8fSBarry Smith   xs = x - 1; /* shift by one for index start of 1 */
489*d6dfbf8fSBarry Smith   ls--;;
490*d6dfbf8fSBarry Smith   if (!A->diag) {if (ierr = MatiAIJmarkdiag(A)) return ierr;}
491*d6dfbf8fSBarry Smith   diag = A->diag;
4921eb62cbbSBarry Smith 
493*d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
494*d6dfbf8fSBarry Smith     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
495*d6dfbf8fSBarry Smith     CHKERR(ierr);
496*d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
497*d6dfbf8fSBarry Smith     CHKERR(ierr);
498*d6dfbf8fSBarry Smith     while (its--) {
499*d6dfbf8fSBarry Smith       /* go down through the rows */
500*d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
501*d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
502*d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
503*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
504*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
505*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
506*d6dfbf8fSBarry Smith         sum  = b[i];
507*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
508*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
509*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
510*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
511*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
512*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
513*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
514*d6dfbf8fSBarry Smith       }
515*d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
516*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
517*d6dfbf8fSBarry Smith       /* come up through the rows */
518*d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
519*d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
520*d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
521*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
522*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
523*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
524*d6dfbf8fSBarry Smith         sum  = b[i];
525*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
526*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
527*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
528*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
529*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
530*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
531*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
532*d6dfbf8fSBarry Smith       }
533*d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
534*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
535*d6dfbf8fSBarry Smith     }
536*d6dfbf8fSBarry Smith   }
537*d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
538*d6dfbf8fSBarry Smith     while (its--) {
539*d6dfbf8fSBarry Smith       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
540*d6dfbf8fSBarry Smith       CHKERR(ierr);
541*d6dfbf8fSBarry Smith       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
542*d6dfbf8fSBarry Smith       CHKERR(ierr);
543*d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
544*d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
545*d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
546*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
547*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
548*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
549*d6dfbf8fSBarry Smith         sum  = b[i];
550*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
551*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
552*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
553*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
554*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
555*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
556*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
557*d6dfbf8fSBarry Smith       }
558*d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
559*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
560*d6dfbf8fSBarry Smith     }
561*d6dfbf8fSBarry Smith   }
562*d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
563*d6dfbf8fSBarry Smith     while (its--) {
564*d6dfbf8fSBarry Smith       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
565*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
566*d6dfbf8fSBarry Smith       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
567*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
568*d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
569*d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
570*d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
571*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
572*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
573*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
574*d6dfbf8fSBarry Smith         sum  = b[i];
575*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
576*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
577*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
578*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
579*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
580*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
581*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
582*d6dfbf8fSBarry Smith       }
583*d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
584*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
585*d6dfbf8fSBarry Smith     }
586*d6dfbf8fSBarry Smith   }
587*d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
588*d6dfbf8fSBarry Smith     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
589*d6dfbf8fSBarry Smith     CHKERR(ierr);
590*d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
591*d6dfbf8fSBarry Smith     CHKERR(ierr);
592*d6dfbf8fSBarry Smith     while (its--) {
593*d6dfbf8fSBarry Smith       /* go down through the rows */
594*d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
595*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
596*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
597*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
598*d6dfbf8fSBarry Smith         sum  = b[i];
599*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
600*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
601*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
602*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
603*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
604*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
605*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
606*d6dfbf8fSBarry Smith       }
607*d6dfbf8fSBarry Smith       /* come up through the rows */
608*d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
609*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
610*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
611*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
612*d6dfbf8fSBarry Smith         sum  = b[i];
613*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
614*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
615*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
616*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
617*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
618*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
619*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
620*d6dfbf8fSBarry Smith       }
621*d6dfbf8fSBarry Smith     }
622*d6dfbf8fSBarry Smith   }
623*d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
624*d6dfbf8fSBarry Smith     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
625*d6dfbf8fSBarry Smith     CHKERR(ierr);
626*d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
627*d6dfbf8fSBarry Smith     CHKERR(ierr);
628*d6dfbf8fSBarry Smith     while (its--) {
629*d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
630*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
631*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
632*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
633*d6dfbf8fSBarry Smith         sum  = b[i];
634*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
635*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
636*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
637*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
638*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
639*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
640*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
641*d6dfbf8fSBarry Smith       }
642*d6dfbf8fSBarry Smith     }
643*d6dfbf8fSBarry Smith   }
644*d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
645*d6dfbf8fSBarry Smith     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
646*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
647*d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
648*d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
649*d6dfbf8fSBarry Smith     while (its--) {
650*d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
651*d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
652*d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
653*d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
654*d6dfbf8fSBarry Smith         sum  = b[i];
655*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
656*d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
657*d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
658*d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
659*d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
660*d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
661*d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
662*d6dfbf8fSBarry Smith       }
663*d6dfbf8fSBarry Smith     }
664*d6dfbf8fSBarry Smith   }
6658a729477SBarry Smith   return 0;
6668a729477SBarry Smith }
667c74985f6SBarry Smith static int MatiAIJinsopt(Mat aijin,int op)
668c74985f6SBarry Smith {
669c74985f6SBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
670c74985f6SBarry Smith 
671c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
672c74985f6SBarry Smith     MatSetOption(aij->A,op);
673c74985f6SBarry Smith     MatSetOption(aij->B,op);
674c74985f6SBarry Smith   }
675c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
676c74985f6SBarry Smith     MatSetOption(aij->A,op);
677c74985f6SBarry Smith     MatSetOption(aij->B,op);
678c74985f6SBarry Smith   }
679c74985f6SBarry Smith   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
680c74985f6SBarry Smith   return 0;
681c74985f6SBarry Smith }
682c74985f6SBarry Smith 
683c74985f6SBarry Smith static int MatiAIJsize(Mat matin,int *m,int *n)
684c74985f6SBarry Smith {
685c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
686c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
687c74985f6SBarry Smith   return 0;
688c74985f6SBarry Smith }
689c74985f6SBarry Smith 
690c74985f6SBarry Smith static int MatiAIJlocalsize(Mat matin,int *m,int *n)
691c74985f6SBarry Smith {
692c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
693c74985f6SBarry Smith   *m = mat->m; *n = mat->n;
694c74985f6SBarry Smith   return 0;
695c74985f6SBarry Smith }
696c74985f6SBarry Smith 
697c74985f6SBarry Smith static int MatiAIJrange(Mat matin,int *m,int *n)
698c74985f6SBarry Smith {
699c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
700c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
701c74985f6SBarry Smith   return 0;
702c74985f6SBarry Smith }
703c74985f6SBarry Smith 
704*d6dfbf8fSBarry Smith static int MatiCopy(Mat,Mat *);
705*d6dfbf8fSBarry Smith 
7068a729477SBarry Smith /* -------------------------------------------------------------------*/
7071eb62cbbSBarry Smith static struct _MatOps MatOps = {MatiAIJInsertValues,
7088a729477SBarry Smith        0, 0,
7091eb62cbbSBarry Smith        MatiAIJMult,0,0,0,
7101eb62cbbSBarry Smith        0,0,0,0,
7118a729477SBarry Smith        0,0,
7128a729477SBarry Smith        MatiAIJrelax,
7138a729477SBarry Smith        0,
7141eb62cbbSBarry Smith        0,0,0,
715*d6dfbf8fSBarry Smith        MatiCopy,
7168a729477SBarry Smith        MatiAIJgetdiag,0,0,
7171eb62cbbSBarry Smith        MatiAIJBeginAssemble,MatiAIJEndAssemble,
7181eb62cbbSBarry Smith        0,
719c74985f6SBarry Smith        MatiAIJinsopt,MatiZero,MatiZerorows,0,
720c74985f6SBarry Smith        0,0,0,0,
721c74985f6SBarry Smith        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
7228a729477SBarry Smith 
7238a729477SBarry Smith 
7248a729477SBarry Smith 
7258a729477SBarry Smith /*@
7268a729477SBarry Smith 
7271eb62cbbSBarry Smith       MatCreateMPIAIJ - Creates a sparse parallel matrix
7281eb62cbbSBarry Smith                                  in AIJ format.
7298a729477SBarry Smith 
7308a729477SBarry Smith   Input Parameters:
7311eb62cbbSBarry Smith .   comm - MPI communicator
7321eb62cbbSBarry Smith .   m,n - number of local rows and columns (or -1 to have calculated)
7331eb62cbbSBarry Smith .   M,N - global rows and columns (or -1 to have calculated)
7341eb62cbbSBarry Smith .   d_nz - total number nonzeros in diagonal portion of matrix
7351eb62cbbSBarry Smith .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
7368a729477SBarry Smith .           You must leave room for the diagonal entry even if it is zero.
7371eb62cbbSBarry Smith .   o_nz - total number nonzeros in off-diagonal portion of matrix
7381eb62cbbSBarry Smith .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
7391eb62cbbSBarry Smith .           or null. You must have at least one nonzero per row.
7408a729477SBarry Smith 
7418a729477SBarry Smith   Output parameters:
7428a729477SBarry Smith .  newmat - the matrix
7438a729477SBarry Smith 
7441eb62cbbSBarry Smith   Keywords: matrix, aij, compressed row, sparse, parallel
7458a729477SBarry Smith @*/
7461eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
7471eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
7488a729477SBarry Smith {
7498a729477SBarry Smith   Mat          mat;
7501eb62cbbSBarry Smith   Matimpiaij   *aij;
7511eb62cbbSBarry Smith   int          ierr, i,rl,len,sum[2],work[2];
7528a729477SBarry Smith   *newmat         = 0;
7538a729477SBarry Smith   CREATEHEADER(mat,_Mat);
7541eb62cbbSBarry Smith   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
7558a729477SBarry Smith   mat->cookie     = MAT_COOKIE;
7568a729477SBarry Smith   mat->ops        = &MatOps;
7578a729477SBarry Smith   mat->destroy    = MatiAIJdestroy;
7581eb62cbbSBarry Smith   mat->view       = MatiView;
7591eb62cbbSBarry Smith   mat->type       = MATAIJMPI;
7608a729477SBarry Smith   mat->factor     = 0;
7618a729477SBarry Smith   mat->row        = 0;
7628a729477SBarry Smith   mat->col        = 0;
763*d6dfbf8fSBarry Smith   mat->outofrange = 0;
764*d6dfbf8fSBarry Smith   mat->Mlow       = 0;
765*d6dfbf8fSBarry Smith   mat->Mhigh      = M;
766*d6dfbf8fSBarry Smith   mat->Nlow       = 0;
767*d6dfbf8fSBarry Smith   mat->Nhigh      = N;
768*d6dfbf8fSBarry Smith 
769*d6dfbf8fSBarry Smith 
770*d6dfbf8fSBarry Smith   mat->comm       = comm;
7711eb62cbbSBarry Smith   aij->insertmode = NotSetValues;
7721eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
7731eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
7741eb62cbbSBarry Smith 
7751eb62cbbSBarry Smith   if (M == -1 || N == -1) {
7761eb62cbbSBarry Smith     work[0] = m; work[1] = n;
777*d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
7781eb62cbbSBarry Smith     if (M == -1) M = sum[0];
7791eb62cbbSBarry Smith     if (N == -1) N = sum[1];
7801eb62cbbSBarry Smith   }
7811eb62cbbSBarry Smith   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
7821eb62cbbSBarry Smith   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
7838a729477SBarry Smith   aij->m       = m;
7848a729477SBarry Smith   aij->n       = n;
7851eb62cbbSBarry Smith   aij->N       = N;
7861eb62cbbSBarry Smith   aij->M       = M;
7871eb62cbbSBarry Smith 
7881eb62cbbSBarry Smith   /* build local table of row and column ownerships */
7891eb62cbbSBarry Smith   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
7901eb62cbbSBarry Smith   CHKPTR(aij->rowners);
7911eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
7921eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
7931eb62cbbSBarry Smith   aij->rowners[0] = 0;
7941eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
7951eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
7968a729477SBarry Smith   }
7971eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
7981eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
7991eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
8001eb62cbbSBarry Smith   aij->cowners[0] = 0;
8011eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
8021eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
8038a729477SBarry Smith   }
8041eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
8051eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
8068a729477SBarry Smith 
8078a729477SBarry Smith 
8081eb62cbbSBarry Smith   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
8091eb62cbbSBarry Smith   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
8108a729477SBarry Smith 
8111eb62cbbSBarry Smith   /* build cache for off array entries formed */
8121eb62cbbSBarry Smith   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
8131eb62cbbSBarry Smith   aij->stash.n    = 0;
8141eb62cbbSBarry Smith   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
8151eb62cbbSBarry Smith                             sizeof(Scalar))); CHKPTR(aij->stash.array);
8161eb62cbbSBarry Smith   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
8171eb62cbbSBarry Smith   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
8188a729477SBarry Smith 
8191eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
8201eb62cbbSBarry Smith   aij->lvec      = 0;
8211eb62cbbSBarry Smith   aij->Mvctx     = 0;
822*d6dfbf8fSBarry Smith   aij->assembled = 0;
8238a729477SBarry Smith 
824*d6dfbf8fSBarry Smith   *newmat = mat;
825*d6dfbf8fSBarry Smith   return 0;
826*d6dfbf8fSBarry Smith }
827c74985f6SBarry Smith 
828*d6dfbf8fSBarry Smith static int MatiCopy(Mat matin,Mat *newmat)
829*d6dfbf8fSBarry Smith {
830*d6dfbf8fSBarry Smith   Mat        mat;
831*d6dfbf8fSBarry Smith   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
832*d6dfbf8fSBarry Smith   int        i,rl,len, m = oldmat->m, n = oldmat->n,ierr;
833*d6dfbf8fSBarry Smith   *newmat      = 0;
834*d6dfbf8fSBarry Smith 
835*d6dfbf8fSBarry Smith   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
836*d6dfbf8fSBarry Smith   CREATEHEADER(mat,_Mat);
837*d6dfbf8fSBarry Smith   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
838*d6dfbf8fSBarry Smith   mat->cookie     = MAT_COOKIE;
839*d6dfbf8fSBarry Smith   mat->ops        = &MatOps;
840*d6dfbf8fSBarry Smith   mat->destroy    = MatiAIJdestroy;
841*d6dfbf8fSBarry Smith   mat->view       = MatiView;
842*d6dfbf8fSBarry Smith   mat->type       = MATAIJSEQ;
843*d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
844*d6dfbf8fSBarry Smith   mat->row        = 0;
845*d6dfbf8fSBarry Smith   mat->col        = 0;
846*d6dfbf8fSBarry Smith   mat->outofrange = matin->outofrange;
847*d6dfbf8fSBarry Smith   mat->Mlow       = matin->Mlow;
848*d6dfbf8fSBarry Smith   mat->Mhigh      = matin->Mhigh;
849*d6dfbf8fSBarry Smith   mat->Nlow       = matin->Nlow;
850*d6dfbf8fSBarry Smith   mat->Nhigh      = matin->Nhigh;
851*d6dfbf8fSBarry Smith 
852*d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
853*d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
854*d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
855*d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
856*d6dfbf8fSBarry Smith 
857*d6dfbf8fSBarry Smith   aij->assembled  = 1;
858*d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
859*d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
860*d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
861*d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
862*d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
863*d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
864*d6dfbf8fSBarry Smith   aij->insertmode = NotSetValues;
865*d6dfbf8fSBarry Smith 
866*d6dfbf8fSBarry Smith   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
867*d6dfbf8fSBarry Smith   CHKPTR(aij->rowners);
868*d6dfbf8fSBarry Smith   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
869*d6dfbf8fSBarry Smith   aij->stash.nmax = 0;
870*d6dfbf8fSBarry Smith   aij->stash.n    = 0;
871*d6dfbf8fSBarry Smith   aij->stash.array= 0;
872*d6dfbf8fSBarry Smith   mat->comm       = matin->comm;
873*d6dfbf8fSBarry Smith 
874*d6dfbf8fSBarry Smith   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
875*d6dfbf8fSBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
876*d6dfbf8fSBarry Smith   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
877*d6dfbf8fSBarry Smith   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
8788a729477SBarry Smith   *newmat = mat;
8798a729477SBarry Smith   return 0;
8808a729477SBarry Smith }
881