xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision da3a660d273b912abcae7b3f88d2c9355b68b6f0)
18a729477SBarry Smith 
21eb62cbbSBarry Smith #include "mpiaij.h"
38a729477SBarry Smith #include "vec/vecimpl.h"
4d6dfbf8fSBarry Smith #include "inline/spops.h"
58a729477SBarry Smith 
61eb62cbbSBarry Smith #define CHUNCKSIZE   100
71eb62cbbSBarry Smith /*
81eb62cbbSBarry Smith    This is a simple minded stash. Do a linear search to determine if
91eb62cbbSBarry Smith  in stash, if not add to end.
101eb62cbbSBarry Smith */
111eb62cbbSBarry Smith static int StashValues(Stash *stash,int row,int n, int *idxn,
121eb62cbbSBarry Smith                        Scalar *values,InsertMode addv)
138a729477SBarry Smith {
141eb62cbbSBarry Smith   int    i,j,N = stash->n,found,*n_idx, *n_idy;
151eb62cbbSBarry Smith   Scalar val,*n_array;
168a729477SBarry Smith 
171eb62cbbSBarry Smith   for ( i=0; i<n; i++ ) {
181eb62cbbSBarry Smith     found = 0;
191eb62cbbSBarry Smith     val = *values++;
208a729477SBarry Smith     for ( j=0; j<N; j++ ) {
211eb62cbbSBarry Smith       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
221eb62cbbSBarry Smith         /* found a match */
231eb62cbbSBarry Smith         if (addv == AddValues) stash->array[j] += val;
241eb62cbbSBarry Smith         else stash->array[j] = val;
251eb62cbbSBarry Smith         found = 1;
268a729477SBarry Smith         break;
278a729477SBarry Smith       }
288a729477SBarry Smith     }
291eb62cbbSBarry Smith     if (!found) { /* not found so add to end */
301eb62cbbSBarry Smith       if ( stash->n == stash->nmax ) {
311eb62cbbSBarry Smith         /* allocate a larger stash */
321eb62cbbSBarry Smith         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
331eb62cbbSBarry Smith                                      2*sizeof(int) + sizeof(Scalar)));
341eb62cbbSBarry Smith         CHKPTR(n_array);
351eb62cbbSBarry Smith         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
361eb62cbbSBarry Smith         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
371eb62cbbSBarry Smith         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
381eb62cbbSBarry Smith         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
391eb62cbbSBarry Smith         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
401eb62cbbSBarry Smith         if (stash->array) FREE(stash->array);
411eb62cbbSBarry Smith         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
421eb62cbbSBarry Smith         stash->nmax += CHUNCKSIZE;
431eb62cbbSBarry Smith       }
441eb62cbbSBarry Smith       stash->array[stash->n]   = val;
451eb62cbbSBarry Smith       stash->idx[stash->n]     = row;
461eb62cbbSBarry Smith       stash->idy[stash->n++]   = idxn[i];
471eb62cbbSBarry Smith     }
488a729477SBarry Smith   }
498a729477SBarry Smith   return 0;
508a729477SBarry Smith }
518a729477SBarry Smith 
521eb62cbbSBarry Smith static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
531eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
548a729477SBarry Smith {
551eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
561eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
571eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
588a729477SBarry Smith 
591eb62cbbSBarry Smith   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
601eb62cbbSBarry Smith     SETERR(1,"You cannot mix inserts and adds");
618a729477SBarry Smith   }
621eb62cbbSBarry Smith   aij->insertmode = addv;
638a729477SBarry Smith   for ( i=0; i<m; i++ ) {
64*da3a660dSBarry Smith     if (idxm[i] < 0) SETERR(1,"Negative row index");
65*da3a660dSBarry Smith     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
661eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
671eb62cbbSBarry Smith       row = idxm[i] - rstart;
681eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
69*da3a660dSBarry Smith         if (idxn[j] < 0) SETERR(1,"Negative column index");
70*da3a660dSBarry Smith         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
711eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
721eb62cbbSBarry Smith           col = idxn[j] - cstart;
731eb62cbbSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
741eb62cbbSBarry Smith         }
751eb62cbbSBarry Smith         else {
76d6dfbf8fSBarry Smith           if (aij->assembled) {
77d6dfbf8fSBarry Smith             SETERR(1,"Cannot insert off diagonal block in already\
78d6dfbf8fSBarry Smith                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
79d6dfbf8fSBarry Smith                      if your need this feature");
80d6dfbf8fSBarry Smith           }
811eb62cbbSBarry Smith           col = idxn[j];
821eb62cbbSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
831eb62cbbSBarry Smith         }
841eb62cbbSBarry Smith       }
851eb62cbbSBarry Smith     }
861eb62cbbSBarry Smith     else {
871eb62cbbSBarry Smith       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
881eb62cbbSBarry Smith     }
898a729477SBarry Smith   }
908a729477SBarry Smith   return 0;
918a729477SBarry Smith }
928a729477SBarry Smith 
938a729477SBarry Smith /*
941eb62cbbSBarry Smith     the assembly code is alot like the code for vectors, we should
951eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
961eb62cbbSBarry Smith     either case.
978a729477SBarry Smith */
988a729477SBarry Smith 
991eb62cbbSBarry Smith static int MatiAIJBeginAssemble(Mat mat)
1008a729477SBarry Smith {
1011eb62cbbSBarry Smith   Matimpiaij  *aij = (Matimpiaij *) mat->data;
102d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
1031eb62cbbSBarry Smith   int         ierr, numtids = aij->numtids, *owners = aij->rowners;
1041eb62cbbSBarry Smith   int         mytid = aij->mytid;
1051eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1061eb62cbbSBarry Smith   int         *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work;
1071eb62cbbSBarry Smith   int         tag = 50, *owner,*starts,count;
1081eb62cbbSBarry Smith   InsertMode  addv;
1091eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1101eb62cbbSBarry Smith 
1111eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
1121eb62cbbSBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT,
1131eb62cbbSBarry Smith                 MPI_BOR,comm);
1141eb62cbbSBarry Smith   if (addv == (AddValues|InsertValues)) {
1151eb62cbbSBarry Smith     SETERR(1,"Some processors have inserted while others have added");
1161eb62cbbSBarry Smith   }
1171eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1181eb62cbbSBarry Smith 
1191eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1201eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
1211eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
1221eb62cbbSBarry Smith   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
1231eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1241eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1251eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1261eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1271eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1288a729477SBarry Smith       }
1298a729477SBarry Smith     }
1308a729477SBarry Smith   }
1311eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1321eb62cbbSBarry Smith 
1331eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1341eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
1351eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1361eb62cbbSBarry Smith   nreceives = work[mytid];
1371eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1381eb62cbbSBarry Smith   nmax = work[mytid];
1391eb62cbbSBarry Smith   FREE(work);
1401eb62cbbSBarry Smith 
1411eb62cbbSBarry Smith   /* post receives:
1421eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1431eb62cbbSBarry Smith      (global index,value) we store the global index as a double
144d6dfbf8fSBarry Smith      to simplify the message passing.
1451eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1461eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1471eb62cbbSBarry Smith      this is a lot of wasted space.
1481eb62cbbSBarry Smith 
1491eb62cbbSBarry Smith 
1501eb62cbbSBarry Smith        This could be done better.
1511eb62cbbSBarry Smith   */
1521eb62cbbSBarry Smith   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar));
1531eb62cbbSBarry Smith   CHKPTR(rvalues);
1541eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
1551eb62cbbSBarry Smith   CHKPTR(recv_waits);
1561eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
1571eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
1581eb62cbbSBarry Smith               comm,recv_waits+i);
1591eb62cbbSBarry Smith   }
1601eb62cbbSBarry Smith 
1611eb62cbbSBarry Smith   /* do sends:
1621eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1631eb62cbbSBarry Smith          the ith processor
1641eb62cbbSBarry Smith   */
1651eb62cbbSBarry Smith   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
1661eb62cbbSBarry Smith   CHKPTR(svalues);
1671eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
1681eb62cbbSBarry Smith   CHKPTR(send_waits);
1691eb62cbbSBarry Smith   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
1701eb62cbbSBarry Smith   starts[0] = 0;
1711eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1721eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1731eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1741eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1751eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1761eb62cbbSBarry Smith   }
1771eb62cbbSBarry Smith   FREE(owner);
1781eb62cbbSBarry Smith   starts[0] = 0;
1791eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1801eb62cbbSBarry Smith   count = 0;
1811eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1821eb62cbbSBarry Smith     if (procs[i]) {
1831eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
1841eb62cbbSBarry Smith                 comm,send_waits+count++);
1851eb62cbbSBarry Smith     }
1861eb62cbbSBarry Smith   }
1871eb62cbbSBarry Smith   FREE(starts); FREE(nprocs);
1881eb62cbbSBarry Smith 
1891eb62cbbSBarry Smith   /* Free cache space */
1901eb62cbbSBarry Smith   aij->stash.nmax = aij->stash.n = 0;
1911eb62cbbSBarry Smith   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
1921eb62cbbSBarry Smith 
1931eb62cbbSBarry Smith   aij->svalues    = svalues;       aij->rvalues = rvalues;
1941eb62cbbSBarry Smith   aij->nsends     = nsends;         aij->nrecvs = nreceives;
1951eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1961eb62cbbSBarry Smith   aij->rmax       = nmax;
1971eb62cbbSBarry Smith 
1981eb62cbbSBarry Smith   return 0;
1991eb62cbbSBarry Smith }
2001eb62cbbSBarry Smith extern int MPIAIJSetUpMultiply(Mat);
2011eb62cbbSBarry Smith 
2021eb62cbbSBarry Smith static int MatiAIJEndAssemble(Mat mat)
2031eb62cbbSBarry Smith {
2041eb62cbbSBarry Smith   int        ierr;
2051eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
2061eb62cbbSBarry Smith 
2071eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
208d6dfbf8fSBarry Smith   int         imdex,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n;
2091eb62cbbSBarry Smith   int         row,col;
2101eb62cbbSBarry Smith   Scalar      *values,val;
2111eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2121eb62cbbSBarry Smith 
2131eb62cbbSBarry Smith   /*  wait on receives */
2141eb62cbbSBarry Smith   while (count) {
215d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2161eb62cbbSBarry Smith     /* unpack receives into our local space */
217d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
2181eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
2191eb62cbbSBarry Smith     n = n/3;
2201eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2211eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2221eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2231eb62cbbSBarry Smith       val = values[3*i+2];
2241eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2251eb62cbbSBarry Smith           col -= aij->cstart;
2261eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2271eb62cbbSBarry Smith       }
2281eb62cbbSBarry Smith       else {
229d6dfbf8fSBarry Smith         if (aij->assembled) {
230d6dfbf8fSBarry Smith           SETERR(1,"Cannot insert off diagonal block in already\
231d6dfbf8fSBarry Smith                    assembled matrix. Contact petsc-maint@mcs.anl.gov\
232d6dfbf8fSBarry Smith                    if your need this feature");
233d6dfbf8fSBarry Smith         }
2341eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2351eb62cbbSBarry Smith       }
2361eb62cbbSBarry Smith     }
2371eb62cbbSBarry Smith     count--;
2381eb62cbbSBarry Smith   }
2391eb62cbbSBarry Smith   FREE(aij->recv_waits); FREE(aij->rvalues);
2401eb62cbbSBarry Smith 
2411eb62cbbSBarry Smith   /* wait on sends */
2421eb62cbbSBarry Smith   if (aij->nsends) {
2431eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
2441eb62cbbSBarry Smith     CHKPTR(send_status);
2451eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2461eb62cbbSBarry Smith     FREE(send_status);
2471eb62cbbSBarry Smith   }
2481eb62cbbSBarry Smith   FREE(aij->send_waits); FREE(aij->svalues);
2491eb62cbbSBarry Smith 
2501eb62cbbSBarry Smith   aij->insertmode = NotSetValues;
2511eb62cbbSBarry Smith   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
2521eb62cbbSBarry Smith   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
2531eb62cbbSBarry Smith 
254d6dfbf8fSBarry Smith 
255d6dfbf8fSBarry Smith   if (aij->assembled) {
256d6dfbf8fSBarry Smith     /* this is because the present implimentation doesn't support
257d6dfbf8fSBarry Smith      inserting values off the diagonal block.*/
258d6dfbf8fSBarry Smith     return 0;
259d6dfbf8fSBarry Smith   }
260d6dfbf8fSBarry Smith 
2611eb62cbbSBarry Smith   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
2621eb62cbbSBarry Smith   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
2631eb62cbbSBarry Smith   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
264d6dfbf8fSBarry Smith 
265d6dfbf8fSBarry Smith   aij->assembled = 1;
2668a729477SBarry Smith   return 0;
2678a729477SBarry Smith }
2688a729477SBarry Smith 
2691eb62cbbSBarry Smith static int MatiZero(Mat A)
2701eb62cbbSBarry Smith {
2711eb62cbbSBarry Smith   Matimpiaij *l = (Matimpiaij *) A->data;
2721eb62cbbSBarry Smith 
2731eb62cbbSBarry Smith   MatZeroEntries(l->A); MatZeroEntries(l->B);
2741eb62cbbSBarry Smith   return 0;
2751eb62cbbSBarry Smith }
2761eb62cbbSBarry Smith 
2771eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2781eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2791eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2801eb62cbbSBarry Smith 
2811eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2821eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2831eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2841eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2851eb62cbbSBarry Smith    routine.
2861eb62cbbSBarry Smith */
2871eb62cbbSBarry Smith static int MatiZerorows(Mat A,IS is,Scalar *diag)
2881eb62cbbSBarry Smith {
2891eb62cbbSBarry Smith   Matimpiaij     *l = (Matimpiaij *) A->data;
2901eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2911eb62cbbSBarry Smith   int            *localrows,*procs,*nprocs,j,found,idx,nsends,*work;
2921eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2931eb62cbbSBarry Smith   int            *rvalues,tag = 67,count,base,slen,n,len,*source;
294d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
295d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
2961eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2971eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2981eb62cbbSBarry Smith   IS             istmp;
2991eb62cbbSBarry Smith 
300*da3a660dSBarry Smith   if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first");
3011eb62cbbSBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
3021eb62cbbSBarry Smith   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
3031eb62cbbSBarry Smith 
3041eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3051eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
3061eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
3071eb62cbbSBarry Smith   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
3081eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3091eb62cbbSBarry Smith     idx = rows[i];
3101eb62cbbSBarry Smith     found = 0;
3111eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
3121eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3131eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3141eb62cbbSBarry Smith       }
3151eb62cbbSBarry Smith     }
316d6dfbf8fSBarry Smith     if (!found) SETERR(1,"Imdex out of range");
3171eb62cbbSBarry Smith   }
3181eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
3191eb62cbbSBarry Smith 
3201eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3211eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
3221eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3231eb62cbbSBarry Smith   nrecvs = work[mytid];
3241eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3251eb62cbbSBarry Smith   nmax = work[mytid];
3261eb62cbbSBarry Smith   FREE(work);
3271eb62cbbSBarry Smith 
3281eb62cbbSBarry Smith   /* post receives:   */
3291eb62cbbSBarry Smith   rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */
3301eb62cbbSBarry Smith   CHKPTR(rvalues);
3311eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
3321eb62cbbSBarry Smith   CHKPTR(recv_waits);
3331eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3341eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3351eb62cbbSBarry Smith               comm,recv_waits+i);
3361eb62cbbSBarry Smith   }
3371eb62cbbSBarry Smith 
3381eb62cbbSBarry Smith   /* do sends:
3391eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3401eb62cbbSBarry Smith          the ith processor
3411eb62cbbSBarry Smith   */
3421eb62cbbSBarry Smith   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
3431eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
3441eb62cbbSBarry Smith   CHKPTR(send_waits);
3451eb62cbbSBarry Smith   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
3461eb62cbbSBarry Smith   starts[0] = 0;
3471eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3481eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3491eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3501eb62cbbSBarry Smith   }
3511eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3521eb62cbbSBarry Smith 
3531eb62cbbSBarry Smith   starts[0] = 0;
3541eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3551eb62cbbSBarry Smith   count = 0;
3561eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3571eb62cbbSBarry Smith     if (procs[i]) {
3581eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3591eb62cbbSBarry Smith                 comm,send_waits+count++);
3601eb62cbbSBarry Smith     }
3611eb62cbbSBarry Smith   }
3621eb62cbbSBarry Smith   FREE(starts);
3631eb62cbbSBarry Smith 
3641eb62cbbSBarry Smith   base = owners[mytid];
3651eb62cbbSBarry Smith 
3661eb62cbbSBarry Smith   /*  wait on receives */
3671eb62cbbSBarry Smith   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
3681eb62cbbSBarry Smith   source = lens + nrecvs;
3691eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3701eb62cbbSBarry Smith   while (count) {
371d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3721eb62cbbSBarry Smith     /* unpack receives into our local space */
3731eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
374d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
375d6dfbf8fSBarry Smith     lens[imdex]  = n;
3761eb62cbbSBarry Smith     slen += n;
3771eb62cbbSBarry Smith     count--;
3781eb62cbbSBarry Smith   }
3791eb62cbbSBarry Smith   FREE(recv_waits);
3801eb62cbbSBarry Smith 
3811eb62cbbSBarry Smith   /* move the data into the send scatter */
3821eb62cbbSBarry Smith   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
3831eb62cbbSBarry Smith   count = 0;
3841eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3851eb62cbbSBarry Smith     values = rvalues + i*nmax;
3861eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3871eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3881eb62cbbSBarry Smith     }
3891eb62cbbSBarry Smith   }
3901eb62cbbSBarry Smith   FREE(rvalues); FREE(lens);
3911eb62cbbSBarry Smith   FREE(owner); FREE(nprocs);
3921eb62cbbSBarry Smith 
3931eb62cbbSBarry Smith   /* actually zap the local rows */
3941eb62cbbSBarry Smith   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
3951eb62cbbSBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
3961eb62cbbSBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
3971eb62cbbSBarry Smith   ierr = ISDestroy(istmp); CHKERR(ierr);
3981eb62cbbSBarry Smith 
3991eb62cbbSBarry Smith   /* wait on sends */
4001eb62cbbSBarry Smith   if (nsends) {
4011eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
4021eb62cbbSBarry Smith     CHKPTR(send_status);
4031eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4041eb62cbbSBarry Smith     FREE(send_status);
4051eb62cbbSBarry Smith   }
4061eb62cbbSBarry Smith   FREE(send_waits); FREE(svalues);
4071eb62cbbSBarry Smith 
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith   return 0;
4101eb62cbbSBarry Smith }
4111eb62cbbSBarry Smith 
4121eb62cbbSBarry Smith static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
4131eb62cbbSBarry Smith {
4141eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
4151eb62cbbSBarry Smith   int        ierr;
416*da3a660dSBarry Smith   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
417d6dfbf8fSBarry Smith   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
4181eb62cbbSBarry Smith   CHKERR(ierr);
4191eb62cbbSBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
420d6dfbf8fSBarry Smith   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
421d6dfbf8fSBarry Smith   CHKERR(ierr);
4221eb62cbbSBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
4231eb62cbbSBarry Smith   return 0;
4241eb62cbbSBarry Smith }
4251eb62cbbSBarry Smith 
426*da3a660dSBarry Smith static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz)
427*da3a660dSBarry Smith {
428*da3a660dSBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
429*da3a660dSBarry Smith   int        ierr;
430*da3a660dSBarry Smith   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
431*da3a660dSBarry Smith   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
432*da3a660dSBarry Smith   CHKERR(ierr);
433*da3a660dSBarry Smith   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
434*da3a660dSBarry Smith   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
435*da3a660dSBarry Smith   CHKERR(ierr);
436*da3a660dSBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
437*da3a660dSBarry Smith   return 0;
438*da3a660dSBarry Smith }
439*da3a660dSBarry Smith 
440*da3a660dSBarry Smith static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy)
441*da3a660dSBarry Smith {
442*da3a660dSBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
443*da3a660dSBarry Smith   int        ierr;
444*da3a660dSBarry Smith 
445*da3a660dSBarry Smith   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
446*da3a660dSBarry Smith   /* do nondiagonal part */
447*da3a660dSBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
448*da3a660dSBarry Smith   /* send it on its way */
449*da3a660dSBarry Smith   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
450*da3a660dSBarry Smith                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
451*da3a660dSBarry Smith   /* do local part */
452*da3a660dSBarry Smith   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
453*da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
454*da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
455*da3a660dSBarry Smith   /* but is not perhaps always true. */
456*da3a660dSBarry Smith   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
457*da3a660dSBarry Smith                          aij->Mvctx); CHKERR(ierr);
458*da3a660dSBarry Smith   return 0;
459*da3a660dSBarry Smith }
460*da3a660dSBarry Smith 
461*da3a660dSBarry Smith static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz)
462*da3a660dSBarry Smith {
463*da3a660dSBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
464*da3a660dSBarry Smith   int        ierr;
465*da3a660dSBarry Smith 
466*da3a660dSBarry Smith   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
467*da3a660dSBarry Smith   /* do nondiagonal part */
468*da3a660dSBarry Smith   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
469*da3a660dSBarry Smith   /* send it on its way */
470*da3a660dSBarry Smith   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
471*da3a660dSBarry Smith                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
472*da3a660dSBarry Smith   /* do local part */
473*da3a660dSBarry Smith   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
474*da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
475*da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
476*da3a660dSBarry Smith   /* but is not perhaps always true. */
477*da3a660dSBarry Smith   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
478*da3a660dSBarry Smith                          aij->Mvctx); CHKERR(ierr);
479*da3a660dSBarry Smith   return 0;
480*da3a660dSBarry Smith }
481*da3a660dSBarry Smith 
4821eb62cbbSBarry Smith /*
4831eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4841eb62cbbSBarry Smith    diagonal block
4851eb62cbbSBarry Smith */
4861eb62cbbSBarry Smith static int MatiAIJgetdiag(Mat Ain,Vec v)
4871eb62cbbSBarry Smith {
4881eb62cbbSBarry Smith   Matimpiaij *A = (Matimpiaij *) Ain->data;
489*da3a660dSBarry Smith   if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first");
4901eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4911eb62cbbSBarry Smith }
4921eb62cbbSBarry Smith 
4931eb62cbbSBarry Smith static int MatiAIJdestroy(PetscObject obj)
4941eb62cbbSBarry Smith {
4951eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
4961eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
4971eb62cbbSBarry Smith   int        ierr;
4981eb62cbbSBarry Smith   FREE(aij->rowners);
4991eb62cbbSBarry Smith   ierr = MatDestroy(aij->A); CHKERR(ierr);
5001eb62cbbSBarry Smith   ierr = MatDestroy(aij->B); CHKERR(ierr);
5011eb62cbbSBarry Smith   FREE(aij); FREE(mat);
5021eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
5031eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
5041eb62cbbSBarry Smith   return 0;
5051eb62cbbSBarry Smith }
5061eb62cbbSBarry Smith 
5071eb62cbbSBarry Smith static int MatiView(PetscObject obj,Viewer viewer)
5081eb62cbbSBarry Smith {
5091eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
5101eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
5111eb62cbbSBarry Smith   int        ierr;
5121eb62cbbSBarry Smith 
513*da3a660dSBarry Smith   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
514d6dfbf8fSBarry Smith   MPE_Seq_begin(mat->comm,1);
5151eb62cbbSBarry Smith   printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
5161eb62cbbSBarry Smith           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
5171eb62cbbSBarry Smith           aij->cend);
5181eb62cbbSBarry Smith   ierr = MatView(aij->A,viewer); CHKERR(ierr);
5191eb62cbbSBarry Smith   ierr = MatView(aij->B,viewer); CHKERR(ierr);
520d6dfbf8fSBarry Smith   MPE_Seq_end(mat->comm,1);
5211eb62cbbSBarry Smith   return 0;
5221eb62cbbSBarry Smith }
5231eb62cbbSBarry Smith 
524d6dfbf8fSBarry Smith extern int MatiAIJmarkdiag(Matiaij  *);
5251eb62cbbSBarry Smith /*
5261eb62cbbSBarry Smith     This has to provide several versions.
5271eb62cbbSBarry Smith 
5281eb62cbbSBarry Smith      1) per sequential
5291eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
5301eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
531d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
5321eb62cbbSBarry Smith */
533d6dfbf8fSBarry Smith static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
5348a729477SBarry Smith                         int its,Vec xx)
5358a729477SBarry Smith {
5361eb62cbbSBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
537d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
538d6dfbf8fSBarry Smith   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
539*da3a660dSBarry Smith   Scalar     zero = 0.0,*b,*x,*w,*xs,*ls,d,*v,sum,scale,*t,*ts;
540d6dfbf8fSBarry Smith   int        ierr,one = 1, tmp, *idx, *diag,mytid;
5418a729477SBarry Smith   int        n = mat->n, m = mat->m, i, j;
542*da3a660dSBarry Smith   Vec        tt;
5438a729477SBarry Smith 
544*da3a660dSBarry Smith   if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first");
5451eb62cbbSBarry Smith 
546d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
547d6dfbf8fSBarry Smith   xs = x -1; /* shift by one for index start of 1 */
548*da3a660dSBarry Smith   ls--;
549d6dfbf8fSBarry Smith   if (!A->diag) {if (ierr = MatiAIJmarkdiag(A)) return ierr;}
550d6dfbf8fSBarry Smith   diag = A->diag;
551*da3a660dSBarry Smith   if (flag & SOR_EISENSTAT) {
552*da3a660dSBarry Smith     /* Let  A = L + U + D; where L is lower trianglar,
553*da3a660dSBarry Smith     U is upper triangular, E is diagonal; This routine applies
554*da3a660dSBarry Smith 
555*da3a660dSBarry Smith             (L + E)^{-1} A (U + E)^{-1}
556*da3a660dSBarry Smith 
557*da3a660dSBarry Smith     to a vector efficiently using Eisenstat's trick. This is for
558*da3a660dSBarry Smith     the case of SSOR preconditioner, so E is D/omega where omega
559*da3a660dSBarry Smith     is the relaxation factor.
560*da3a660dSBarry Smith     */
561*da3a660dSBarry Smith     ierr = VecCreate(xx,&tt); CHKERR(ierr);
562*da3a660dSBarry Smith     VecGetArray(tt,&t);
563*da3a660dSBarry Smith     scale = (2.0/omega) - 1.0;
564*da3a660dSBarry Smith     /*  x = (E + U)^{-1} b */
565*da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
566*da3a660dSBarry Smith     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
567*da3a660dSBarry Smith                               mat->Mvctx); CHKERR(ierr);
568*da3a660dSBarry Smith     for ( i=m-1; i>-1; i-- ) {
569*da3a660dSBarry Smith       n    = A->i[i+1] - diag[i] - 1;
570*da3a660dSBarry Smith       idx  = A->j + diag[i];
571*da3a660dSBarry Smith       v    = A->a + diag[i];
572*da3a660dSBarry Smith       sum  = b[i];
573*da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,xs,v,idx,n);
574*da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
575*da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
576*da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
577*da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
578*da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
579*da3a660dSBarry Smith       x[i] = omega*(sum/d);
580*da3a660dSBarry Smith     }
581*da3a660dSBarry Smith     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
582*da3a660dSBarry Smith                             mat->Mvctx); CHKERR(ierr);
583*da3a660dSBarry Smith 
584*da3a660dSBarry Smith     /*  t = b - (2*E - D)x */
585*da3a660dSBarry Smith     v = A->a;
586*da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
587*da3a660dSBarry Smith 
588*da3a660dSBarry Smith     /*  t = (E + L)^{-1}t */
589*da3a660dSBarry Smith     ts = t - 1; /* shifted by one for index start of a or mat->j*/
590*da3a660dSBarry Smith     diag = A->diag;
591*da3a660dSBarry Smith     VecSet(&zero,mat->lvec);
592*da3a660dSBarry Smith     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
593*da3a660dSBarry Smith                                                  mat->Mvctx); CHKERR(ierr);
594*da3a660dSBarry Smith     for ( i=0; i<m; i++ ) {
595*da3a660dSBarry Smith       n    = diag[i] - A->i[i];
596*da3a660dSBarry Smith       idx  = A->j + A->i[i] - 1;
597*da3a660dSBarry Smith       v    = A->a + A->i[i] - 1;
598*da3a660dSBarry Smith       sum  = t[i];
599*da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ts,v,idx,n);
600*da3a660dSBarry Smith       d    = shift + A->a[diag[i]-1];
601*da3a660dSBarry Smith       n    = B->i[i+1] - B->i[i];
602*da3a660dSBarry Smith       idx  = B->j + B->i[i] - 1;
603*da3a660dSBarry Smith       v    = B->a + B->i[i] - 1;
604*da3a660dSBarry Smith       SPARSEDENSEMDOT(sum,ls,v,idx,n);
605*da3a660dSBarry Smith       t[i] = omega*(sum/d);
606*da3a660dSBarry Smith     }
607*da3a660dSBarry Smith     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
608*da3a660dSBarry Smith                                                     mat->Mvctx); CHKERR(ierr);
609*da3a660dSBarry Smith     /*  x = x + t */
610*da3a660dSBarry Smith     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
611*da3a660dSBarry Smith     VecDestroy(tt);
612*da3a660dSBarry Smith     return 0;
613*da3a660dSBarry Smith   }
614*da3a660dSBarry Smith 
6151eb62cbbSBarry Smith 
616d6dfbf8fSBarry Smith   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
617*da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
618*da3a660dSBarry Smith       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
619*da3a660dSBarry Smith     }
620*da3a660dSBarry Smith     else {
621d6dfbf8fSBarry Smith       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
622d6dfbf8fSBarry Smith       CHKERR(ierr);
623d6dfbf8fSBarry Smith       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
624d6dfbf8fSBarry Smith       CHKERR(ierr);
625*da3a660dSBarry Smith     }
626d6dfbf8fSBarry Smith     while (its--) {
627d6dfbf8fSBarry Smith       /* go down through the rows */
628d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
629d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
630d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
631d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
632d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
633d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
634d6dfbf8fSBarry Smith         sum  = b[i];
635d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
636d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
637d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
638d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
639d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
640d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
641d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
642d6dfbf8fSBarry Smith       }
643d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
644d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
645d6dfbf8fSBarry Smith       /* come up through the rows */
646d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
647d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
648d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
649d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
650d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
651d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
652d6dfbf8fSBarry Smith         sum  = b[i];
653d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
654d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
655d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
656d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
657d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
658d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
659d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
660d6dfbf8fSBarry Smith       }
661d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
662d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
663d6dfbf8fSBarry Smith     }
664d6dfbf8fSBarry Smith   }
665d6dfbf8fSBarry Smith   else if (flag & SOR_FORWARD_SWEEP){
666*da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
667*da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
668*da3a660dSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
669*da3a660dSBarry Smith                               mat->Mvctx); CHKERR(ierr);
670*da3a660dSBarry Smith       for ( i=0; i<m; i++ ) {
671*da3a660dSBarry Smith         n    = diag[i] - A->i[i];
672*da3a660dSBarry Smith         idx  = A->j + A->i[i] - 1;
673*da3a660dSBarry Smith         v    = A->a + A->i[i] - 1;
674*da3a660dSBarry Smith         sum  = b[i];
675*da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
676*da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
677*da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
678*da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
679*da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
680*da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
681*da3a660dSBarry Smith         x[i] = omega*(sum/d);
682*da3a660dSBarry Smith       }
683*da3a660dSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
684*da3a660dSBarry Smith                             mat->Mvctx); CHKERR(ierr);
685*da3a660dSBarry Smith       its--;
686*da3a660dSBarry Smith     }
687d6dfbf8fSBarry Smith     while (its--) {
688d6dfbf8fSBarry Smith       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
689d6dfbf8fSBarry Smith       CHKERR(ierr);
690d6dfbf8fSBarry Smith       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
691d6dfbf8fSBarry Smith       CHKERR(ierr);
692d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
693d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
694d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
695d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
696d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
697d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
698d6dfbf8fSBarry Smith         sum  = b[i];
699d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
700d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
701d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
702d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
703d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
704d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
705d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
706d6dfbf8fSBarry Smith       }
707d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
708d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
709d6dfbf8fSBarry Smith     }
710d6dfbf8fSBarry Smith   }
711d6dfbf8fSBarry Smith   else if (flag & SOR_BACKWARD_SWEEP){
712*da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
713*da3a660dSBarry Smith       VecSet(&zero,mat->lvec);
714*da3a660dSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
715*da3a660dSBarry Smith                               mat->Mvctx); CHKERR(ierr);
716*da3a660dSBarry Smith       for ( i=m-1; i>-1; i-- ) {
717*da3a660dSBarry Smith         n    = A->i[i+1] - diag[i] - 1;
718*da3a660dSBarry Smith         idx  = A->j + diag[i];
719*da3a660dSBarry Smith         v    = A->a + diag[i];
720*da3a660dSBarry Smith         sum  = b[i];
721*da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
722*da3a660dSBarry Smith         d    = shift + A->a[diag[i]-1];
723*da3a660dSBarry Smith         n    = B->i[i+1] - B->i[i];
724*da3a660dSBarry Smith         idx  = B->j + B->i[i] - 1;
725*da3a660dSBarry Smith         v    = B->a + B->i[i] - 1;
726*da3a660dSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
727*da3a660dSBarry Smith         x[i] = omega*(sum/d);
728*da3a660dSBarry Smith       }
729*da3a660dSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
730*da3a660dSBarry Smith                             mat->Mvctx); CHKERR(ierr);
731*da3a660dSBarry Smith       its--;
732*da3a660dSBarry Smith     }
733d6dfbf8fSBarry Smith     while (its--) {
734d6dfbf8fSBarry Smith       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
735d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
736d6dfbf8fSBarry Smith       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
737d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
738d6dfbf8fSBarry Smith       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
739d6dfbf8fSBarry Smith                               mat->Mvctx); CHKERR(ierr);
740d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
741d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
742d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
743d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
744d6dfbf8fSBarry Smith         sum  = b[i];
745d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
746d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
747d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
748d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
749d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
750d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
751d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
752d6dfbf8fSBarry Smith       }
753d6dfbf8fSBarry Smith       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
754d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
755d6dfbf8fSBarry Smith     }
756d6dfbf8fSBarry Smith   }
757d6dfbf8fSBarry Smith   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
758*da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
759*da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
760*da3a660dSBarry Smith     }
761d6dfbf8fSBarry Smith     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
762d6dfbf8fSBarry Smith     CHKERR(ierr);
763d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
764d6dfbf8fSBarry Smith     CHKERR(ierr);
765d6dfbf8fSBarry Smith     while (its--) {
766d6dfbf8fSBarry Smith       /* go down through the rows */
767d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
768d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
769d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
770d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
771d6dfbf8fSBarry Smith         sum  = b[i];
772d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
773d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
774d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
775d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
776d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
777d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
778d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
779d6dfbf8fSBarry Smith       }
780d6dfbf8fSBarry Smith       /* come up through the rows */
781d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
782d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
783d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
784d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
785d6dfbf8fSBarry Smith         sum  = b[i];
786d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
787d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
788d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
789d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
790d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
791d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
792d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
793d6dfbf8fSBarry Smith       }
794d6dfbf8fSBarry Smith     }
795d6dfbf8fSBarry Smith   }
796d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
797*da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
798*da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
799*da3a660dSBarry Smith     }
800d6dfbf8fSBarry Smith     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
801d6dfbf8fSBarry Smith     CHKERR(ierr);
802d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
803d6dfbf8fSBarry Smith     CHKERR(ierr);
804d6dfbf8fSBarry Smith     while (its--) {
805d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
806d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
807d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
808d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
809d6dfbf8fSBarry Smith         sum  = b[i];
810d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
811d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
812d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
813d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
814d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
815d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
816d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
817d6dfbf8fSBarry Smith       }
818d6dfbf8fSBarry Smith     }
819d6dfbf8fSBarry Smith   }
820d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
821*da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
822*da3a660dSBarry Smith       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
823*da3a660dSBarry Smith     }
824d6dfbf8fSBarry Smith     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
825d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
826d6dfbf8fSBarry Smith     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
827d6dfbf8fSBarry Smith                             mat->Mvctx); CHKERR(ierr);
828d6dfbf8fSBarry Smith     while (its--) {
829d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
830d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
831d6dfbf8fSBarry Smith         idx  = A->j + A->i[i] - 1;
832d6dfbf8fSBarry Smith         v    = A->a + A->i[i] - 1;
833d6dfbf8fSBarry Smith         sum  = b[i];
834d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835d6dfbf8fSBarry Smith         d    = shift + A->a[diag[i]-1];
836d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
837d6dfbf8fSBarry Smith         idx  = B->j + B->i[i] - 1;
838d6dfbf8fSBarry Smith         v    = B->a + B->i[i] - 1;
839d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840d6dfbf8fSBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
841d6dfbf8fSBarry Smith       }
842d6dfbf8fSBarry Smith     }
843d6dfbf8fSBarry Smith   }
8448a729477SBarry Smith   return 0;
8458a729477SBarry Smith }
846c74985f6SBarry Smith static int MatiAIJinsopt(Mat aijin,int op)
847c74985f6SBarry Smith {
848c74985f6SBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
849c74985f6SBarry Smith 
850c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
851c74985f6SBarry Smith     MatSetOption(aij->A,op);
852c74985f6SBarry Smith     MatSetOption(aij->B,op);
853c74985f6SBarry Smith   }
854c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
855c74985f6SBarry Smith     MatSetOption(aij->A,op);
856c74985f6SBarry Smith     MatSetOption(aij->B,op);
857c74985f6SBarry Smith   }
858c74985f6SBarry Smith   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
859c74985f6SBarry Smith   return 0;
860c74985f6SBarry Smith }
861c74985f6SBarry Smith 
862c74985f6SBarry Smith static int MatiAIJsize(Mat matin,int *m,int *n)
863c74985f6SBarry Smith {
864c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
865c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
866c74985f6SBarry Smith   return 0;
867c74985f6SBarry Smith }
868c74985f6SBarry Smith 
869c74985f6SBarry Smith static int MatiAIJlocalsize(Mat matin,int *m,int *n)
870c74985f6SBarry Smith {
871c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
872c74985f6SBarry Smith   *m = mat->m; *n = mat->n;
873c74985f6SBarry Smith   return 0;
874c74985f6SBarry Smith }
875c74985f6SBarry Smith 
876c74985f6SBarry Smith static int MatiAIJrange(Mat matin,int *m,int *n)
877c74985f6SBarry Smith {
878c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
879c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
880c74985f6SBarry Smith   return 0;
881c74985f6SBarry Smith }
882c74985f6SBarry Smith 
883d6dfbf8fSBarry Smith static int MatiCopy(Mat,Mat *);
884d6dfbf8fSBarry Smith 
8858a729477SBarry Smith /* -------------------------------------------------------------------*/
8861eb62cbbSBarry Smith static struct _MatOps MatOps = {MatiAIJInsertValues,
8878a729477SBarry Smith        0, 0,
888*da3a660dSBarry Smith        MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd,
8891eb62cbbSBarry Smith        0,0,0,0,
8908a729477SBarry Smith        0,0,
8918a729477SBarry Smith        MatiAIJrelax,
8928a729477SBarry Smith        0,
8931eb62cbbSBarry Smith        0,0,0,
894d6dfbf8fSBarry Smith        MatiCopy,
8958a729477SBarry Smith        MatiAIJgetdiag,0,0,
8961eb62cbbSBarry Smith        MatiAIJBeginAssemble,MatiAIJEndAssemble,
8971eb62cbbSBarry Smith        0,
898c74985f6SBarry Smith        MatiAIJinsopt,MatiZero,MatiZerorows,0,
899c74985f6SBarry Smith        0,0,0,0,
900c74985f6SBarry Smith        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
9018a729477SBarry Smith 
9028a729477SBarry Smith 
9038a729477SBarry Smith 
9048a729477SBarry Smith /*@
9058a729477SBarry Smith 
9061eb62cbbSBarry Smith       MatCreateMPIAIJ - Creates a sparse parallel matrix
9071eb62cbbSBarry Smith                                  in AIJ format.
9088a729477SBarry Smith 
9098a729477SBarry Smith   Input Parameters:
9101eb62cbbSBarry Smith .   comm - MPI communicator
9111eb62cbbSBarry Smith .   m,n - number of local rows and columns (or -1 to have calculated)
9121eb62cbbSBarry Smith .   M,N - global rows and columns (or -1 to have calculated)
9131eb62cbbSBarry Smith .   d_nz - total number nonzeros in diagonal portion of matrix
9141eb62cbbSBarry Smith .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
9158a729477SBarry Smith .           You must leave room for the diagonal entry even if it is zero.
9161eb62cbbSBarry Smith .   o_nz - total number nonzeros in off-diagonal portion of matrix
9171eb62cbbSBarry Smith .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
9181eb62cbbSBarry Smith .           or null. You must have at least one nonzero per row.
9198a729477SBarry Smith 
9208a729477SBarry Smith   Output parameters:
9218a729477SBarry Smith .  newmat - the matrix
9228a729477SBarry Smith 
9231eb62cbbSBarry Smith   Keywords: matrix, aij, compressed row, sparse, parallel
9248a729477SBarry Smith @*/
9251eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
9261eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
9278a729477SBarry Smith {
9288a729477SBarry Smith   Mat          mat;
9291eb62cbbSBarry Smith   Matimpiaij   *aij;
9301eb62cbbSBarry Smith   int          ierr, i,rl,len,sum[2],work[2];
9318a729477SBarry Smith   *newmat         = 0;
9328a729477SBarry Smith   CREATEHEADER(mat,_Mat);
9331eb62cbbSBarry Smith   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
9348a729477SBarry Smith   mat->cookie     = MAT_COOKIE;
9358a729477SBarry Smith   mat->ops        = &MatOps;
9368a729477SBarry Smith   mat->destroy    = MatiAIJdestroy;
9371eb62cbbSBarry Smith   mat->view       = MatiView;
9381eb62cbbSBarry Smith   mat->type       = MATAIJMPI;
9398a729477SBarry Smith   mat->factor     = 0;
9408a729477SBarry Smith   mat->row        = 0;
9418a729477SBarry Smith   mat->col        = 0;
942d6dfbf8fSBarry Smith 
943d6dfbf8fSBarry Smith   mat->comm       = comm;
9441eb62cbbSBarry Smith   aij->insertmode = NotSetValues;
9451eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
9461eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
9471eb62cbbSBarry Smith 
9481eb62cbbSBarry Smith   if (M == -1 || N == -1) {
9491eb62cbbSBarry Smith     work[0] = m; work[1] = n;
950d6dfbf8fSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
9511eb62cbbSBarry Smith     if (M == -1) M = sum[0];
9521eb62cbbSBarry Smith     if (N == -1) N = sum[1];
9531eb62cbbSBarry Smith   }
9541eb62cbbSBarry Smith   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
9551eb62cbbSBarry Smith   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
9568a729477SBarry Smith   aij->m       = m;
9578a729477SBarry Smith   aij->n       = n;
9581eb62cbbSBarry Smith   aij->N       = N;
9591eb62cbbSBarry Smith   aij->M       = M;
9601eb62cbbSBarry Smith 
9611eb62cbbSBarry Smith   /* build local table of row and column ownerships */
9621eb62cbbSBarry Smith   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
9631eb62cbbSBarry Smith   CHKPTR(aij->rowners);
9641eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
9651eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
9661eb62cbbSBarry Smith   aij->rowners[0] = 0;
9671eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
9681eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
9698a729477SBarry Smith   }
9701eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
9711eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
9721eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
9731eb62cbbSBarry Smith   aij->cowners[0] = 0;
9741eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
9751eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
9768a729477SBarry Smith   }
9771eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
9781eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
9798a729477SBarry Smith 
9808a729477SBarry Smith 
9811eb62cbbSBarry Smith   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
9821eb62cbbSBarry Smith   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
9838a729477SBarry Smith 
9841eb62cbbSBarry Smith   /* build cache for off array entries formed */
9851eb62cbbSBarry Smith   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
9861eb62cbbSBarry Smith   aij->stash.n    = 0;
9871eb62cbbSBarry Smith   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
9881eb62cbbSBarry Smith                             sizeof(Scalar))); CHKPTR(aij->stash.array);
9891eb62cbbSBarry Smith   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
9901eb62cbbSBarry Smith   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
9918a729477SBarry Smith 
9921eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
9931eb62cbbSBarry Smith   aij->lvec      = 0;
9941eb62cbbSBarry Smith   aij->Mvctx     = 0;
995d6dfbf8fSBarry Smith   aij->assembled = 0;
9968a729477SBarry Smith 
997d6dfbf8fSBarry Smith   *newmat = mat;
998d6dfbf8fSBarry Smith   return 0;
999d6dfbf8fSBarry Smith }
1000c74985f6SBarry Smith 
1001d6dfbf8fSBarry Smith static int MatiCopy(Mat matin,Mat *newmat)
1002d6dfbf8fSBarry Smith {
1003d6dfbf8fSBarry Smith   Mat        mat;
1004d6dfbf8fSBarry Smith   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
1005d6dfbf8fSBarry Smith   int        i,rl,len, m = oldmat->m, n = oldmat->n,ierr;
1006d6dfbf8fSBarry Smith   *newmat      = 0;
1007d6dfbf8fSBarry Smith 
1008d6dfbf8fSBarry Smith   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1009d6dfbf8fSBarry Smith   CREATEHEADER(mat,_Mat);
1010d6dfbf8fSBarry Smith   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
1011d6dfbf8fSBarry Smith   mat->cookie     = MAT_COOKIE;
1012d6dfbf8fSBarry Smith   mat->ops        = &MatOps;
1013d6dfbf8fSBarry Smith   mat->destroy    = MatiAIJdestroy;
1014d6dfbf8fSBarry Smith   mat->view       = MatiView;
1015d6dfbf8fSBarry Smith   mat->type       = MATAIJSEQ;
1016d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1017d6dfbf8fSBarry Smith   mat->row        = 0;
1018d6dfbf8fSBarry Smith   mat->col        = 0;
1019d6dfbf8fSBarry Smith 
1020d6dfbf8fSBarry Smith   aij->m          = oldmat->m;
1021d6dfbf8fSBarry Smith   aij->n          = oldmat->n;
1022d6dfbf8fSBarry Smith   aij->M          = oldmat->M;
1023d6dfbf8fSBarry Smith   aij->N          = oldmat->N;
1024d6dfbf8fSBarry Smith 
1025d6dfbf8fSBarry Smith   aij->assembled  = 1;
1026d6dfbf8fSBarry Smith   aij->rstart     = oldmat->rstart;
1027d6dfbf8fSBarry Smith   aij->rend       = oldmat->rend;
1028d6dfbf8fSBarry Smith   aij->cstart     = oldmat->cstart;
1029d6dfbf8fSBarry Smith   aij->cend       = oldmat->cend;
1030d6dfbf8fSBarry Smith   aij->numtids    = oldmat->numtids;
1031d6dfbf8fSBarry Smith   aij->mytid      = oldmat->mytid;
1032d6dfbf8fSBarry Smith   aij->insertmode = NotSetValues;
1033d6dfbf8fSBarry Smith 
1034d6dfbf8fSBarry Smith   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1035d6dfbf8fSBarry Smith   CHKPTR(aij->rowners);
1036d6dfbf8fSBarry Smith   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1037d6dfbf8fSBarry Smith   aij->stash.nmax = 0;
1038d6dfbf8fSBarry Smith   aij->stash.n    = 0;
1039d6dfbf8fSBarry Smith   aij->stash.array= 0;
1040d6dfbf8fSBarry Smith   mat->comm       = matin->comm;
1041d6dfbf8fSBarry Smith 
1042d6dfbf8fSBarry Smith   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1043d6dfbf8fSBarry Smith   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1044d6dfbf8fSBarry Smith   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1045d6dfbf8fSBarry Smith   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
10468a729477SBarry Smith   *newmat = mat;
10478a729477SBarry Smith   return 0;
10488a729477SBarry Smith }
1049