xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c74985f65431909a76d9ed326b933d7bd12add40)
18a729477SBarry Smith 
21eb62cbbSBarry Smith #include "mpiaij.h"
38a729477SBarry Smith #include "vec/vecimpl.h"
48a729477SBarry Smith 
58a729477SBarry Smith 
61eb62cbbSBarry Smith #define CHUNCKSIZE   100
71eb62cbbSBarry Smith /*
81eb62cbbSBarry Smith    This is a simple minded stash. Do a linear search to determine if
91eb62cbbSBarry Smith  in stash, if not add to end.
101eb62cbbSBarry Smith */
111eb62cbbSBarry Smith static int StashValues(Stash *stash,int row,int n, int *idxn,
121eb62cbbSBarry Smith                        Scalar *values,InsertMode addv)
138a729477SBarry Smith {
141eb62cbbSBarry Smith   int    i,j,N = stash->n,found,*n_idx, *n_idy;
151eb62cbbSBarry Smith   Scalar val,*n_array;
168a729477SBarry Smith 
171eb62cbbSBarry Smith   for ( i=0; i<n; i++ ) {
181eb62cbbSBarry Smith     found = 0;
191eb62cbbSBarry Smith     val = *values++;
208a729477SBarry Smith     for ( j=0; j<N; j++ ) {
211eb62cbbSBarry Smith       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
221eb62cbbSBarry Smith         /* found a match */
231eb62cbbSBarry Smith         if (addv == AddValues) stash->array[j] += val;
241eb62cbbSBarry Smith         else stash->array[j] = val;
251eb62cbbSBarry Smith         found = 1;
268a729477SBarry Smith         break;
278a729477SBarry Smith       }
288a729477SBarry Smith     }
291eb62cbbSBarry Smith     if (!found) { /* not found so add to end */
301eb62cbbSBarry Smith       if ( stash->n == stash->nmax ) {
311eb62cbbSBarry Smith         /* allocate a larger stash */
321eb62cbbSBarry Smith         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
331eb62cbbSBarry Smith                                      2*sizeof(int) + sizeof(Scalar)));
341eb62cbbSBarry Smith         CHKPTR(n_array);
351eb62cbbSBarry Smith         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
361eb62cbbSBarry Smith         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
371eb62cbbSBarry Smith         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
381eb62cbbSBarry Smith         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
391eb62cbbSBarry Smith         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
401eb62cbbSBarry Smith         if (stash->array) FREE(stash->array);
411eb62cbbSBarry Smith         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
421eb62cbbSBarry Smith         stash->nmax += CHUNCKSIZE;
431eb62cbbSBarry Smith       }
441eb62cbbSBarry Smith       stash->array[stash->n]   = val;
451eb62cbbSBarry Smith       stash->idx[stash->n]     = row;
461eb62cbbSBarry Smith       stash->idy[stash->n++]   = idxn[i];
471eb62cbbSBarry Smith     }
488a729477SBarry Smith   }
498a729477SBarry Smith   return 0;
508a729477SBarry Smith }
518a729477SBarry Smith 
521eb62cbbSBarry Smith static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
531eb62cbbSBarry Smith                             int *idxn,Scalar *v,InsertMode addv)
548a729477SBarry Smith {
551eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
561eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
571eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
588a729477SBarry Smith 
591eb62cbbSBarry Smith   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
601eb62cbbSBarry Smith     SETERR(1,"You cannot mix inserts and adds");
618a729477SBarry Smith   }
621eb62cbbSBarry Smith   aij->insertmode = addv;
638a729477SBarry Smith   for ( i=0; i<m; i++ ) {
64*c74985f6SBarry Smith     if (idxm[i] < 0 || idxm[i] >= aij->M) {
65*c74985f6SBarry Smith       if (aij->outofrange) continue;
66*c74985f6SBarry Smith       else SETERR(1,"row index out of range");
67*c74985f6SBarry Smith     }
681eb62cbbSBarry Smith     if (idxm[i] >= rstart && idxm[i] < rend) {
691eb62cbbSBarry Smith       row = idxm[i] - rstart;
701eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
71*c74985f6SBarry Smith         if (idxn[i] < 0 || idxn[i] >= aij->N) {
72*c74985f6SBarry Smith           if (aij->outofrange) continue;
73*c74985f6SBarry Smith           else SETERR(1,"column index out of range");
74*c74985f6SBarry Smith         }
751eb62cbbSBarry Smith         if (idxn[j] >= cstart && idxn[j] < cend){
761eb62cbbSBarry Smith           col = idxn[j] - cstart;
771eb62cbbSBarry Smith           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
781eb62cbbSBarry Smith         }
791eb62cbbSBarry Smith         else {
801eb62cbbSBarry Smith           col = idxn[j];
811eb62cbbSBarry Smith           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
821eb62cbbSBarry Smith         }
831eb62cbbSBarry Smith       }
841eb62cbbSBarry Smith     }
851eb62cbbSBarry Smith     else {
861eb62cbbSBarry Smith       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
871eb62cbbSBarry Smith     }
888a729477SBarry Smith   }
898a729477SBarry Smith   return 0;
908a729477SBarry Smith }
918a729477SBarry Smith 
928a729477SBarry Smith /*
931eb62cbbSBarry Smith     the assembly code is alot like the code for vectors, we should
941eb62cbbSBarry Smith     sometime derive a single assembly code that can be used for
951eb62cbbSBarry Smith     either case.
968a729477SBarry Smith */
978a729477SBarry Smith 
981eb62cbbSBarry Smith static int MatiAIJBeginAssemble(Mat mat)
998a729477SBarry Smith {
1001eb62cbbSBarry Smith   Matimpiaij  *aij = (Matimpiaij *) mat->data;
1011eb62cbbSBarry Smith   MPI_Comm    comm = aij->comm;
1021eb62cbbSBarry Smith   int         ierr, numtids = aij->numtids, *owners = aij->rowners;
1031eb62cbbSBarry Smith   int         mytid = aij->mytid;
1041eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1051eb62cbbSBarry Smith   int         *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work;
1061eb62cbbSBarry Smith   int         tag = 50, *owner,*starts,count;
1071eb62cbbSBarry Smith   InsertMode  addv;
1081eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1091eb62cbbSBarry Smith 
1101eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
1111eb62cbbSBarry Smith   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT,
1121eb62cbbSBarry Smith                 MPI_BOR,comm);
1131eb62cbbSBarry Smith   if (addv == (AddValues|InsertValues)) {
1141eb62cbbSBarry Smith     SETERR(1,"Some processors have inserted while others have added");
1151eb62cbbSBarry Smith   }
1161eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1171eb62cbbSBarry Smith 
1181eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1191eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
1201eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
1211eb62cbbSBarry Smith   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
1221eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1231eb62cbbSBarry Smith     idx = aij->stash.idx[i];
1241eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
1251eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1261eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1278a729477SBarry Smith       }
1288a729477SBarry Smith     }
1298a729477SBarry Smith   }
1301eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
1311eb62cbbSBarry Smith 
1321eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1331eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
1341eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
1351eb62cbbSBarry Smith   nreceives = work[mytid];
1361eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
1371eb62cbbSBarry Smith   nmax = work[mytid];
1381eb62cbbSBarry Smith   FREE(work);
1391eb62cbbSBarry Smith 
1401eb62cbbSBarry Smith   /* post receives:
1411eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1421eb62cbbSBarry Smith      (global index,value) we store the global index as a double
1431eb62cbbSBarry Smith      to simply the message passing.
1441eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1451eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1461eb62cbbSBarry Smith      this is a lot of wasted space.
1471eb62cbbSBarry Smith 
1481eb62cbbSBarry Smith 
1491eb62cbbSBarry Smith        This could be done better.
1501eb62cbbSBarry Smith   */
1511eb62cbbSBarry Smith   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar));
1521eb62cbbSBarry Smith   CHKPTR(rvalues);
1531eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
1541eb62cbbSBarry Smith   CHKPTR(recv_waits);
1551eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
1561eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
1571eb62cbbSBarry Smith               comm,recv_waits+i);
1581eb62cbbSBarry Smith   }
1591eb62cbbSBarry Smith 
1601eb62cbbSBarry Smith   /* do sends:
1611eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1621eb62cbbSBarry Smith          the ith processor
1631eb62cbbSBarry Smith   */
1641eb62cbbSBarry Smith   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
1651eb62cbbSBarry Smith   CHKPTR(svalues);
1661eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
1671eb62cbbSBarry Smith   CHKPTR(send_waits);
1681eb62cbbSBarry Smith   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
1691eb62cbbSBarry Smith   starts[0] = 0;
1701eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1711eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1721eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
1731eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
1741eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
1751eb62cbbSBarry Smith   }
1761eb62cbbSBarry Smith   FREE(owner);
1771eb62cbbSBarry Smith   starts[0] = 0;
1781eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1791eb62cbbSBarry Smith   count = 0;
1801eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
1811eb62cbbSBarry Smith     if (procs[i]) {
1821eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
1831eb62cbbSBarry Smith                 comm,send_waits+count++);
1841eb62cbbSBarry Smith     }
1851eb62cbbSBarry Smith   }
1861eb62cbbSBarry Smith   FREE(starts); FREE(nprocs);
1871eb62cbbSBarry Smith 
1881eb62cbbSBarry Smith   /* Free cache space */
1891eb62cbbSBarry Smith   aij->stash.nmax = aij->stash.n = 0;
1901eb62cbbSBarry Smith   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
1911eb62cbbSBarry Smith 
1921eb62cbbSBarry Smith   aij->svalues    = svalues;       aij->rvalues = rvalues;
1931eb62cbbSBarry Smith   aij->nsends     = nsends;         aij->nrecvs = nreceives;
1941eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
1951eb62cbbSBarry Smith   aij->rmax       = nmax;
1961eb62cbbSBarry Smith 
1971eb62cbbSBarry Smith   return 0;
1981eb62cbbSBarry Smith }
1991eb62cbbSBarry Smith extern int MPIAIJSetUpMultiply(Mat);
2001eb62cbbSBarry Smith 
2011eb62cbbSBarry Smith static int MatiAIJEndAssemble(Mat mat)
2021eb62cbbSBarry Smith {
2031eb62cbbSBarry Smith   int        ierr;
2041eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
2051eb62cbbSBarry Smith 
2061eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
2071eb62cbbSBarry Smith   int         index,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n;
2081eb62cbbSBarry Smith   int         row,col;
2091eb62cbbSBarry Smith   Scalar      *values,val;
2101eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2111eb62cbbSBarry Smith 
2121eb62cbbSBarry Smith   /*  wait on receives */
2131eb62cbbSBarry Smith   while (count) {
2141eb62cbbSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&index,&recv_status);
2151eb62cbbSBarry Smith     /* unpack receives into our local space */
2161eb62cbbSBarry Smith     values = aij->rvalues + 3*index*aij->rmax;
2171eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
2181eb62cbbSBarry Smith     n = n/3;
2191eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
2201eb62cbbSBarry Smith       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
2211eb62cbbSBarry Smith       col = (int) PETSCREAL(values[3*i+1]);
2221eb62cbbSBarry Smith       val = values[3*i+2];
2231eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2241eb62cbbSBarry Smith           col -= aij->cstart;
2251eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2261eb62cbbSBarry Smith       }
2271eb62cbbSBarry Smith       else {
2281eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2291eb62cbbSBarry Smith       }
2301eb62cbbSBarry Smith     }
2311eb62cbbSBarry Smith     count--;
2321eb62cbbSBarry Smith   }
2331eb62cbbSBarry Smith   FREE(aij->recv_waits); FREE(aij->rvalues);
2341eb62cbbSBarry Smith 
2351eb62cbbSBarry Smith   /* wait on sends */
2361eb62cbbSBarry Smith   if (aij->nsends) {
2371eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
2381eb62cbbSBarry Smith     CHKPTR(send_status);
2391eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2401eb62cbbSBarry Smith     FREE(send_status);
2411eb62cbbSBarry Smith   }
2421eb62cbbSBarry Smith   FREE(aij->send_waits); FREE(aij->svalues);
2431eb62cbbSBarry Smith 
2441eb62cbbSBarry Smith   aij->insertmode = NotSetValues;
2451eb62cbbSBarry Smith   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
2461eb62cbbSBarry Smith   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
2471eb62cbbSBarry Smith 
2481eb62cbbSBarry Smith   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
2491eb62cbbSBarry Smith   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
2501eb62cbbSBarry Smith   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
2518a729477SBarry Smith   return 0;
2528a729477SBarry Smith }
2538a729477SBarry Smith 
2541eb62cbbSBarry Smith static int MatiZero(Mat A)
2551eb62cbbSBarry Smith {
2561eb62cbbSBarry Smith   Matimpiaij *l = (Matimpiaij *) A->data;
2571eb62cbbSBarry Smith 
2581eb62cbbSBarry Smith   MatZeroEntries(l->A); MatZeroEntries(l->B);
2591eb62cbbSBarry Smith   return 0;
2601eb62cbbSBarry Smith }
2611eb62cbbSBarry Smith 
2621eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and
2631eb62cbbSBarry Smith    scatter create routines, we should try to do it systemamatically
2641eb62cbbSBarry Smith    if we can figure out the proper level of generality. */
2651eb62cbbSBarry Smith 
2661eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
2671eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
2681eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
2691eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
2701eb62cbbSBarry Smith    routine.
2711eb62cbbSBarry Smith */
2721eb62cbbSBarry Smith static int MatiZerorows(Mat A,IS is,Scalar *diag)
2731eb62cbbSBarry Smith {
2741eb62cbbSBarry Smith   Matimpiaij     *l = (Matimpiaij *) A->data;
2751eb62cbbSBarry Smith   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
2761eb62cbbSBarry Smith   int            *localrows,*procs,*nprocs,j,found,idx,nsends,*work;
2771eb62cbbSBarry Smith   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
2781eb62cbbSBarry Smith   int            *rvalues,tag = 67,count,base,slen,n,len,*source;
2791eb62cbbSBarry Smith   int            *lens,index,*lrows,*values;
2801eb62cbbSBarry Smith   MPI_Comm       comm = l->comm;
2811eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
2821eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
2831eb62cbbSBarry Smith   IS             istmp;
2841eb62cbbSBarry Smith 
2851eb62cbbSBarry Smith   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
2861eb62cbbSBarry Smith   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
2871eb62cbbSBarry Smith 
2881eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
2891eb62cbbSBarry Smith   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
2901eb62cbbSBarry Smith   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
2911eb62cbbSBarry Smith   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
2921eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
2931eb62cbbSBarry Smith     idx = rows[i];
2941eb62cbbSBarry Smith     found = 0;
2951eb62cbbSBarry Smith     for ( j=0; j<numtids; j++ ) {
2961eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
2971eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2981eb62cbbSBarry Smith       }
2991eb62cbbSBarry Smith     }
3001eb62cbbSBarry Smith     if (!found) SETERR(1,"Index out of range");
3011eb62cbbSBarry Smith   }
3021eb62cbbSBarry Smith   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
3031eb62cbbSBarry Smith 
3041eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3051eb62cbbSBarry Smith   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
3061eb62cbbSBarry Smith   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
3071eb62cbbSBarry Smith   nrecvs = work[mytid];
3081eb62cbbSBarry Smith   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
3091eb62cbbSBarry Smith   nmax = work[mytid];
3101eb62cbbSBarry Smith   FREE(work);
3111eb62cbbSBarry Smith 
3121eb62cbbSBarry Smith   /* post receives:   */
3131eb62cbbSBarry Smith   rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */
3141eb62cbbSBarry Smith   CHKPTR(rvalues);
3151eb62cbbSBarry Smith   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
3161eb62cbbSBarry Smith   CHKPTR(recv_waits);
3171eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3181eb62cbbSBarry Smith     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
3191eb62cbbSBarry Smith               comm,recv_waits+i);
3201eb62cbbSBarry Smith   }
3211eb62cbbSBarry Smith 
3221eb62cbbSBarry Smith   /* do sends:
3231eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3241eb62cbbSBarry Smith          the ith processor
3251eb62cbbSBarry Smith   */
3261eb62cbbSBarry Smith   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
3271eb62cbbSBarry Smith   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
3281eb62cbbSBarry Smith   CHKPTR(send_waits);
3291eb62cbbSBarry Smith   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
3301eb62cbbSBarry Smith   starts[0] = 0;
3311eb62cbbSBarry Smith   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3321eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3331eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
3341eb62cbbSBarry Smith   }
3351eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
3361eb62cbbSBarry Smith 
3371eb62cbbSBarry Smith   starts[0] = 0;
3381eb62cbbSBarry Smith   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3391eb62cbbSBarry Smith   count = 0;
3401eb62cbbSBarry Smith   for ( i=0; i<numtids; i++ ) {
3411eb62cbbSBarry Smith     if (procs[i]) {
3421eb62cbbSBarry Smith       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
3431eb62cbbSBarry Smith                 comm,send_waits+count++);
3441eb62cbbSBarry Smith     }
3451eb62cbbSBarry Smith   }
3461eb62cbbSBarry Smith   FREE(starts);
3471eb62cbbSBarry Smith 
3481eb62cbbSBarry Smith   base = owners[mytid];
3491eb62cbbSBarry Smith 
3501eb62cbbSBarry Smith   /*  wait on receives */
3511eb62cbbSBarry Smith   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
3521eb62cbbSBarry Smith   source = lens + nrecvs;
3531eb62cbbSBarry Smith   count = nrecvs; slen = 0;
3541eb62cbbSBarry Smith   while (count) {
3551eb62cbbSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&index,&recv_status);
3561eb62cbbSBarry Smith     /* unpack receives into our local space */
3571eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
3581eb62cbbSBarry Smith     source[index]  = recv_status.MPI_SOURCE;
3591eb62cbbSBarry Smith     lens[index]  = n;
3601eb62cbbSBarry Smith     slen += n;
3611eb62cbbSBarry Smith     count--;
3621eb62cbbSBarry Smith   }
3631eb62cbbSBarry Smith   FREE(recv_waits);
3641eb62cbbSBarry Smith 
3651eb62cbbSBarry Smith   /* move the data into the send scatter */
3661eb62cbbSBarry Smith   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
3671eb62cbbSBarry Smith   count = 0;
3681eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
3691eb62cbbSBarry Smith     values = rvalues + i*nmax;
3701eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
3711eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
3721eb62cbbSBarry Smith     }
3731eb62cbbSBarry Smith   }
3741eb62cbbSBarry Smith   FREE(rvalues); FREE(lens);
3751eb62cbbSBarry Smith   FREE(owner); FREE(nprocs);
3761eb62cbbSBarry Smith 
3771eb62cbbSBarry Smith   /* actually zap the local rows */
3781eb62cbbSBarry Smith   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
3791eb62cbbSBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
3801eb62cbbSBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
3811eb62cbbSBarry Smith   ierr = ISDestroy(istmp); CHKERR(ierr);
3821eb62cbbSBarry Smith 
3831eb62cbbSBarry Smith   /* wait on sends */
3841eb62cbbSBarry Smith   if (nsends) {
3851eb62cbbSBarry Smith     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
3861eb62cbbSBarry Smith     CHKPTR(send_status);
3871eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
3881eb62cbbSBarry Smith     FREE(send_status);
3891eb62cbbSBarry Smith   }
3901eb62cbbSBarry Smith   FREE(send_waits); FREE(svalues);
3911eb62cbbSBarry Smith 
3921eb62cbbSBarry Smith 
3931eb62cbbSBarry Smith   return 0;
3941eb62cbbSBarry Smith }
3951eb62cbbSBarry Smith 
3961eb62cbbSBarry Smith static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
3971eb62cbbSBarry Smith {
3981eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
3991eb62cbbSBarry Smith   int        ierr;
4001eb62cbbSBarry Smith 
4011eb62cbbSBarry Smith   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,&aij->Mvctx);
4021eb62cbbSBarry Smith   CHKERR(ierr);
4031eb62cbbSBarry Smith   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
4041eb62cbbSBarry Smith   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,&aij->Mvctx); CHKERR(ierr);
4051eb62cbbSBarry Smith   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
4061eb62cbbSBarry Smith   return 0;
4071eb62cbbSBarry Smith }
4081eb62cbbSBarry Smith 
4091eb62cbbSBarry Smith /*
4101eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
4111eb62cbbSBarry Smith    diagonal block
4121eb62cbbSBarry Smith */
4131eb62cbbSBarry Smith static int MatiAIJgetdiag(Mat Ain,Vec v)
4141eb62cbbSBarry Smith {
4151eb62cbbSBarry Smith   Matimpiaij *A = (Matimpiaij *) Ain->data;
4161eb62cbbSBarry Smith   return MatGetDiagonal(A->A,v);
4171eb62cbbSBarry Smith }
4181eb62cbbSBarry Smith 
4191eb62cbbSBarry Smith static int MatiAIJdestroy(PetscObject obj)
4201eb62cbbSBarry Smith {
4211eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
4221eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
4231eb62cbbSBarry Smith   int        ierr;
4241eb62cbbSBarry Smith   FREE(aij->rowners);
4251eb62cbbSBarry Smith   ierr = MatDestroy(aij->A); CHKERR(ierr);
4261eb62cbbSBarry Smith   ierr = MatDestroy(aij->B); CHKERR(ierr);
4271eb62cbbSBarry Smith   FREE(aij); FREE(mat);
4281eb62cbbSBarry Smith   if (aij->lvec) VecDestroy(aij->lvec);
4291eb62cbbSBarry Smith   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
4301eb62cbbSBarry Smith   return 0;
4311eb62cbbSBarry Smith }
4321eb62cbbSBarry Smith 
4331eb62cbbSBarry Smith static int MatiView(PetscObject obj,Viewer viewer)
4341eb62cbbSBarry Smith {
4351eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
4361eb62cbbSBarry Smith   Matimpiaij *aij = (Matimpiaij *) mat->data;
4371eb62cbbSBarry Smith   int        ierr;
4381eb62cbbSBarry Smith 
4391eb62cbbSBarry Smith   MPE_Seq_begin(aij->comm,1);
4401eb62cbbSBarry Smith   printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
4411eb62cbbSBarry Smith           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
4421eb62cbbSBarry Smith           aij->cend);
4431eb62cbbSBarry Smith   ierr = MatView(aij->A,viewer); CHKERR(ierr);
4441eb62cbbSBarry Smith   ierr = MatView(aij->B,viewer); CHKERR(ierr);
4451eb62cbbSBarry Smith   MPE_Seq_end(aij->comm,1);
4461eb62cbbSBarry Smith   return 0;
4471eb62cbbSBarry Smith }
4481eb62cbbSBarry Smith 
4491eb62cbbSBarry Smith /*
4501eb62cbbSBarry Smith     This has to provide several versions.
4511eb62cbbSBarry Smith 
4521eb62cbbSBarry Smith      1) per sequential
4531eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
4541eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
4551eb62cbbSBarry Smith      3) color updating out values betwen colors. (this imples an
4561eb62cbbSBarry Smith         ordering that is sort of related to the IS argument, it
4571eb62cbbSBarry Smith         is not clear a IS argument makes the most sense perhaps it
4581eb62cbbSBarry Smith         should be dropped.
4591eb62cbbSBarry Smith */
4608a729477SBarry Smith static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,IS is,
4618a729477SBarry Smith                         int its,Vec xx)
4628a729477SBarry Smith {
4631eb62cbbSBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
4641eb62cbbSBarry Smith   Scalar     zero = 0.0;
4658a729477SBarry Smith   int        ierr,one = 1, tmp, *idx, *diag;
4668a729477SBarry Smith   int        n = mat->n, m = mat->m, i, j;
4678a729477SBarry Smith 
4688a729477SBarry Smith   if (is) SETERR(1,"No support for ordering in relaxation");
4698a729477SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
4708a729477SBarry Smith     if (ierr = VecSet(&zero,xx)) return ierr;
4718a729477SBarry Smith   }
4721eb62cbbSBarry Smith 
4731eb62cbbSBarry Smith   /* update outer values from other processors*/
4741eb62cbbSBarry Smith 
4751eb62cbbSBarry Smith   /* smooth locally */
4768a729477SBarry Smith   return 0;
4778a729477SBarry Smith }
478*c74985f6SBarry Smith static int MatiAIJinsopt(Mat aijin,int op)
479*c74985f6SBarry Smith {
480*c74985f6SBarry Smith   Matimpiaij *aij = (Matimpiaij *) aijin->data;
481*c74985f6SBarry Smith 
482*c74985f6SBarry Smith   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
483*c74985f6SBarry Smith     MatSetOption(aij->A,op);
484*c74985f6SBarry Smith     MatSetOption(aij->B,op);
485*c74985f6SBarry Smith   }
486*c74985f6SBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) {
487*c74985f6SBarry Smith     MatSetOption(aij->A,op);
488*c74985f6SBarry Smith     MatSetOption(aij->B,op);
489*c74985f6SBarry Smith   }
490*c74985f6SBarry Smith   else if (op == ALLOW_OUT_OF_RANGE)  aij->outofrange  = 1;
491*c74985f6SBarry Smith   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
492*c74985f6SBarry Smith   return 0;
493*c74985f6SBarry Smith }
494*c74985f6SBarry Smith 
495*c74985f6SBarry Smith static int MatiAIJsize(Mat matin,int *m,int *n)
496*c74985f6SBarry Smith {
497*c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
498*c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
499*c74985f6SBarry Smith   return 0;
500*c74985f6SBarry Smith }
501*c74985f6SBarry Smith 
502*c74985f6SBarry Smith static int MatiAIJlocalsize(Mat matin,int *m,int *n)
503*c74985f6SBarry Smith {
504*c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
505*c74985f6SBarry Smith   *m = mat->m; *n = mat->n;
506*c74985f6SBarry Smith   return 0;
507*c74985f6SBarry Smith }
508*c74985f6SBarry Smith 
509*c74985f6SBarry Smith static int MatiAIJrange(Mat matin,int *m,int *n)
510*c74985f6SBarry Smith {
511*c74985f6SBarry Smith   Matimpiaij *mat = (Matimpiaij *) matin->data;
512*c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
513*c74985f6SBarry Smith   return 0;
514*c74985f6SBarry Smith }
515*c74985f6SBarry Smith 
5168a729477SBarry Smith /* -------------------------------------------------------------------*/
5171eb62cbbSBarry Smith static struct _MatOps MatOps = {MatiAIJInsertValues,
5188a729477SBarry Smith        0, 0,
5191eb62cbbSBarry Smith        MatiAIJMult,0,0,0,
5201eb62cbbSBarry Smith        0,0,0,0,
5218a729477SBarry Smith        0,0,
5228a729477SBarry Smith        MatiAIJrelax,
5238a729477SBarry Smith        0,
5241eb62cbbSBarry Smith        0,0,0,
5258a729477SBarry Smith        0,
5268a729477SBarry Smith        MatiAIJgetdiag,0,0,
5271eb62cbbSBarry Smith        MatiAIJBeginAssemble,MatiAIJEndAssemble,
5281eb62cbbSBarry Smith        0,
529*c74985f6SBarry Smith        MatiAIJinsopt,MatiZero,MatiZerorows,0,
530*c74985f6SBarry Smith        0,0,0,0,
531*c74985f6SBarry Smith        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
5328a729477SBarry Smith 
5338a729477SBarry Smith 
5348a729477SBarry Smith 
5358a729477SBarry Smith /*@
5368a729477SBarry Smith 
5371eb62cbbSBarry Smith       MatCreateMPIAIJ - Creates a sparse parallel matrix
5381eb62cbbSBarry Smith                                  in AIJ format.
5398a729477SBarry Smith 
5408a729477SBarry Smith   Input Parameters:
5411eb62cbbSBarry Smith .   comm - MPI communicator
5421eb62cbbSBarry Smith .   m,n - number of local rows and columns (or -1 to have calculated)
5431eb62cbbSBarry Smith .   M,N - global rows and columns (or -1 to have calculated)
5441eb62cbbSBarry Smith .   d_nz - total number nonzeros in diagonal portion of matrix
5451eb62cbbSBarry Smith .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
5468a729477SBarry Smith .           You must leave room for the diagonal entry even if it is zero.
5471eb62cbbSBarry Smith .   o_nz - total number nonzeros in off-diagonal portion of matrix
5481eb62cbbSBarry Smith .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
5491eb62cbbSBarry Smith .           or null. You must have at least one nonzero per row.
5508a729477SBarry Smith 
5518a729477SBarry Smith   Output parameters:
5528a729477SBarry Smith .  newmat - the matrix
5538a729477SBarry Smith 
5541eb62cbbSBarry Smith   Keywords: matrix, aij, compressed row, sparse, parallel
5558a729477SBarry Smith @*/
5561eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
5571eb62cbbSBarry Smith                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
5588a729477SBarry Smith {
5598a729477SBarry Smith   Mat          mat;
5601eb62cbbSBarry Smith   Matimpiaij   *aij;
5611eb62cbbSBarry Smith   int          ierr, i,rl,len,sum[2],work[2];
5628a729477SBarry Smith   *newmat         = 0;
5638a729477SBarry Smith   CREATEHEADER(mat,_Mat);
5641eb62cbbSBarry Smith   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
5658a729477SBarry Smith   mat->cookie     = MAT_COOKIE;
5668a729477SBarry Smith   mat->ops        = &MatOps;
5678a729477SBarry Smith   mat->destroy    = MatiAIJdestroy;
5681eb62cbbSBarry Smith   mat->view       = MatiView;
5691eb62cbbSBarry Smith   mat->type       = MATAIJMPI;
5708a729477SBarry Smith   mat->factor     = 0;
5718a729477SBarry Smith   mat->row        = 0;
5728a729477SBarry Smith   mat->col        = 0;
5731eb62cbbSBarry Smith   aij->comm       = comm;
5741eb62cbbSBarry Smith   aij->insertmode = NotSetValues;
5751eb62cbbSBarry Smith   MPI_Comm_rank(comm,&aij->mytid);
5761eb62cbbSBarry Smith   MPI_Comm_size(comm,&aij->numtids);
5771eb62cbbSBarry Smith 
5781eb62cbbSBarry Smith   if (M == -1 || N == -1) {
5791eb62cbbSBarry Smith     work[0] = m; work[1] = n;
5801eb62cbbSBarry Smith     MPI_Allreduce((void *) work,(void *) sum,1,MPI_INT,MPI_SUM,comm );
5811eb62cbbSBarry Smith     if (M == -1) M = sum[0];
5821eb62cbbSBarry Smith     if (N == -1) N = sum[1];
5831eb62cbbSBarry Smith   }
5841eb62cbbSBarry Smith   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
5851eb62cbbSBarry Smith   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
5868a729477SBarry Smith   aij->m       = m;
5878a729477SBarry Smith   aij->n       = n;
5881eb62cbbSBarry Smith   aij->N       = N;
5891eb62cbbSBarry Smith   aij->M       = M;
5901eb62cbbSBarry Smith 
5911eb62cbbSBarry Smith   /* build local table of row and column ownerships */
5921eb62cbbSBarry Smith   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
5931eb62cbbSBarry Smith   CHKPTR(aij->rowners);
5941eb62cbbSBarry Smith   aij->cowners = aij->rowners + aij->numtids + 1;
5951eb62cbbSBarry Smith   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
5961eb62cbbSBarry Smith   aij->rowners[0] = 0;
5971eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
5981eb62cbbSBarry Smith     aij->rowners[i] += aij->rowners[i-1];
5998a729477SBarry Smith   }
6001eb62cbbSBarry Smith   aij->rstart = aij->rowners[aij->mytid];
6011eb62cbbSBarry Smith   aij->rend   = aij->rowners[aij->mytid+1];
6021eb62cbbSBarry Smith   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
6031eb62cbbSBarry Smith   aij->cowners[0] = 0;
6041eb62cbbSBarry Smith   for ( i=2; i<=aij->numtids; i++ ) {
6051eb62cbbSBarry Smith     aij->cowners[i] += aij->cowners[i-1];
6068a729477SBarry Smith   }
6071eb62cbbSBarry Smith   aij->cstart = aij->cowners[aij->mytid];
6081eb62cbbSBarry Smith   aij->cend   = aij->cowners[aij->mytid+1];
6098a729477SBarry Smith 
6108a729477SBarry Smith 
6111eb62cbbSBarry Smith   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
6121eb62cbbSBarry Smith   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
6138a729477SBarry Smith 
6141eb62cbbSBarry Smith   /* build cache for off array entries formed */
6151eb62cbbSBarry Smith   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
6161eb62cbbSBarry Smith   aij->stash.n    = 0;
6171eb62cbbSBarry Smith   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
6181eb62cbbSBarry Smith                             sizeof(Scalar))); CHKPTR(aij->stash.array);
6191eb62cbbSBarry Smith   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
6201eb62cbbSBarry Smith   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
6218a729477SBarry Smith 
6221eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
6231eb62cbbSBarry Smith   aij->lvec      = 0;
6241eb62cbbSBarry Smith   aij->Mvctx     = 0;
6258a729477SBarry Smith 
626*c74985f6SBarry Smith   aij->outofrange = 0;
627*c74985f6SBarry Smith 
6288a729477SBarry Smith   *newmat = mat;
6298a729477SBarry Smith   return 0;
6308a729477SBarry Smith }
631