18a729477SBarry Smith 21eb62cbbSBarry Smith #include "mpiaij.h" 38a729477SBarry Smith #include "vec/vecimpl.h" 48a729477SBarry Smith 58a729477SBarry Smith 61eb62cbbSBarry Smith #define CHUNCKSIZE 100 71eb62cbbSBarry Smith /* 81eb62cbbSBarry Smith This is a simple minded stash. Do a linear search to determine if 91eb62cbbSBarry Smith in stash, if not add to end. 101eb62cbbSBarry Smith */ 111eb62cbbSBarry Smith static int StashValues(Stash *stash,int row,int n, int *idxn, 121eb62cbbSBarry Smith Scalar *values,InsertMode addv) 138a729477SBarry Smith { 141eb62cbbSBarry Smith int i,j,N = stash->n,found,*n_idx, *n_idy; 151eb62cbbSBarry Smith Scalar val,*n_array; 168a729477SBarry Smith 171eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 181eb62cbbSBarry Smith found = 0; 191eb62cbbSBarry Smith val = *values++; 208a729477SBarry Smith for ( j=0; j<N; j++ ) { 211eb62cbbSBarry Smith if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 221eb62cbbSBarry Smith /* found a match */ 231eb62cbbSBarry Smith if (addv == AddValues) stash->array[j] += val; 241eb62cbbSBarry Smith else stash->array[j] = val; 251eb62cbbSBarry Smith found = 1; 268a729477SBarry Smith break; 278a729477SBarry Smith } 288a729477SBarry Smith } 291eb62cbbSBarry Smith if (!found) { /* not found so add to end */ 301eb62cbbSBarry Smith if ( stash->n == stash->nmax ) { 311eb62cbbSBarry Smith /* allocate a larger stash */ 321eb62cbbSBarry Smith n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 331eb62cbbSBarry Smith 2*sizeof(int) + sizeof(Scalar))); 341eb62cbbSBarry Smith CHKPTR(n_array); 351eb62cbbSBarry Smith n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 361eb62cbbSBarry Smith n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 371eb62cbbSBarry Smith MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 381eb62cbbSBarry Smith MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 391eb62cbbSBarry Smith MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 401eb62cbbSBarry Smith if (stash->array) FREE(stash->array); 411eb62cbbSBarry Smith stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 421eb62cbbSBarry Smith stash->nmax += CHUNCKSIZE; 431eb62cbbSBarry Smith } 441eb62cbbSBarry Smith stash->array[stash->n] = val; 451eb62cbbSBarry Smith stash->idx[stash->n] = row; 461eb62cbbSBarry Smith stash->idy[stash->n++] = idxn[i]; 471eb62cbbSBarry Smith } 488a729477SBarry Smith } 498a729477SBarry Smith return 0; 508a729477SBarry Smith } 518a729477SBarry Smith 521eb62cbbSBarry Smith static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 531eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 548a729477SBarry Smith { 551eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 561eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 571eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 588a729477SBarry Smith 591eb62cbbSBarry Smith if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 601eb62cbbSBarry Smith SETERR(1,"You cannot mix inserts and adds"); 618a729477SBarry Smith } 621eb62cbbSBarry Smith aij->insertmode = addv; 638a729477SBarry Smith for ( i=0; i<m; i++ ) { 64*c74985f6SBarry Smith if (idxm[i] < 0 || idxm[i] >= aij->M) { 65*c74985f6SBarry Smith if (aij->outofrange) continue; 66*c74985f6SBarry Smith else SETERR(1,"row index out of range"); 67*c74985f6SBarry Smith } 681eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 691eb62cbbSBarry Smith row = idxm[i] - rstart; 701eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 71*c74985f6SBarry Smith if (idxn[i] < 0 || idxn[i] >= aij->N) { 72*c74985f6SBarry Smith if (aij->outofrange) continue; 73*c74985f6SBarry Smith else SETERR(1,"column index out of range"); 74*c74985f6SBarry Smith } 751eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 761eb62cbbSBarry Smith col = idxn[j] - cstart; 771eb62cbbSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith else { 801eb62cbbSBarry Smith col = idxn[j]; 811eb62cbbSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 821eb62cbbSBarry Smith } 831eb62cbbSBarry Smith } 841eb62cbbSBarry Smith } 851eb62cbbSBarry Smith else { 861eb62cbbSBarry Smith ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 871eb62cbbSBarry Smith } 888a729477SBarry Smith } 898a729477SBarry Smith return 0; 908a729477SBarry Smith } 918a729477SBarry Smith 928a729477SBarry Smith /* 931eb62cbbSBarry Smith the assembly code is alot like the code for vectors, we should 941eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 951eb62cbbSBarry Smith either case. 968a729477SBarry Smith */ 978a729477SBarry Smith 981eb62cbbSBarry Smith static int MatiAIJBeginAssemble(Mat mat) 998a729477SBarry Smith { 1001eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 1011eb62cbbSBarry Smith MPI_Comm comm = aij->comm; 1021eb62cbbSBarry Smith int ierr, numtids = aij->numtids, *owners = aij->rowners; 1031eb62cbbSBarry Smith int mytid = aij->mytid; 1041eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1051eb62cbbSBarry Smith int *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work; 1061eb62cbbSBarry Smith int tag = 50, *owner,*starts,count; 1071eb62cbbSBarry Smith InsertMode addv; 1081eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1091eb62cbbSBarry Smith 1101eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 1111eb62cbbSBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT, 1121eb62cbbSBarry Smith MPI_BOR,comm); 1131eb62cbbSBarry Smith if (addv == (AddValues|InsertValues)) { 1141eb62cbbSBarry Smith SETERR(1,"Some processors have inserted while others have added"); 1151eb62cbbSBarry Smith } 1161eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1171eb62cbbSBarry Smith 1181eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1191eb62cbbSBarry Smith nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 1201eb62cbbSBarry Smith MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 1211eb62cbbSBarry Smith owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 1221eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1231eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1241eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1251eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1261eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1278a729477SBarry Smith } 1288a729477SBarry Smith } 1298a729477SBarry Smith } 1301eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1311eb62cbbSBarry Smith 1321eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1331eb62cbbSBarry Smith work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 1341eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1351eb62cbbSBarry Smith nreceives = work[mytid]; 1361eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1371eb62cbbSBarry Smith nmax = work[mytid]; 1381eb62cbbSBarry Smith FREE(work); 1391eb62cbbSBarry Smith 1401eb62cbbSBarry Smith /* post receives: 1411eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1421eb62cbbSBarry Smith (global index,value) we store the global index as a double 1431eb62cbbSBarry Smith to simply the message passing. 1441eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1451eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1461eb62cbbSBarry Smith this is a lot of wasted space. 1471eb62cbbSBarry Smith 1481eb62cbbSBarry Smith 1491eb62cbbSBarry Smith This could be done better. 1501eb62cbbSBarry Smith */ 1511eb62cbbSBarry Smith rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar)); 1521eb62cbbSBarry Smith CHKPTR(rvalues); 1531eb62cbbSBarry Smith recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 1541eb62cbbSBarry Smith CHKPTR(recv_waits); 1551eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 1561eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 1571eb62cbbSBarry Smith comm,recv_waits+i); 1581eb62cbbSBarry Smith } 1591eb62cbbSBarry Smith 1601eb62cbbSBarry Smith /* do sends: 1611eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1621eb62cbbSBarry Smith the ith processor 1631eb62cbbSBarry Smith */ 1641eb62cbbSBarry Smith svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 1651eb62cbbSBarry Smith CHKPTR(svalues); 1661eb62cbbSBarry Smith send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 1671eb62cbbSBarry Smith CHKPTR(send_waits); 1681eb62cbbSBarry Smith starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 1691eb62cbbSBarry Smith starts[0] = 0; 1701eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1711eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1721eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1731eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1741eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1751eb62cbbSBarry Smith } 1761eb62cbbSBarry Smith FREE(owner); 1771eb62cbbSBarry Smith starts[0] = 0; 1781eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1791eb62cbbSBarry Smith count = 0; 1801eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1811eb62cbbSBarry Smith if (procs[i]) { 1821eb62cbbSBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 1831eb62cbbSBarry Smith comm,send_waits+count++); 1841eb62cbbSBarry Smith } 1851eb62cbbSBarry Smith } 1861eb62cbbSBarry Smith FREE(starts); FREE(nprocs); 1871eb62cbbSBarry Smith 1881eb62cbbSBarry Smith /* Free cache space */ 1891eb62cbbSBarry Smith aij->stash.nmax = aij->stash.n = 0; 1901eb62cbbSBarry Smith if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 1911eb62cbbSBarry Smith 1921eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1931eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1941eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1951eb62cbbSBarry Smith aij->rmax = nmax; 1961eb62cbbSBarry Smith 1971eb62cbbSBarry Smith return 0; 1981eb62cbbSBarry Smith } 1991eb62cbbSBarry Smith extern int MPIAIJSetUpMultiply(Mat); 2001eb62cbbSBarry Smith 2011eb62cbbSBarry Smith static int MatiAIJEndAssemble(Mat mat) 2021eb62cbbSBarry Smith { 2031eb62cbbSBarry Smith int ierr; 2041eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 2051eb62cbbSBarry Smith 2061eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 2071eb62cbbSBarry Smith int index,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n; 2081eb62cbbSBarry Smith int row,col; 2091eb62cbbSBarry Smith Scalar *values,val; 2101eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2111eb62cbbSBarry Smith 2121eb62cbbSBarry Smith /* wait on receives */ 2131eb62cbbSBarry Smith while (count) { 2141eb62cbbSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&index,&recv_status); 2151eb62cbbSBarry Smith /* unpack receives into our local space */ 2161eb62cbbSBarry Smith values = aij->rvalues + 3*index*aij->rmax; 2171eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_SCALAR,&n); 2181eb62cbbSBarry Smith n = n/3; 2191eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2201eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2211eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2221eb62cbbSBarry Smith val = values[3*i+2]; 2231eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2241eb62cbbSBarry Smith col -= aij->cstart; 2251eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2261eb62cbbSBarry Smith } 2271eb62cbbSBarry Smith else { 2281eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2291eb62cbbSBarry Smith } 2301eb62cbbSBarry Smith } 2311eb62cbbSBarry Smith count--; 2321eb62cbbSBarry Smith } 2331eb62cbbSBarry Smith FREE(aij->recv_waits); FREE(aij->rvalues); 2341eb62cbbSBarry Smith 2351eb62cbbSBarry Smith /* wait on sends */ 2361eb62cbbSBarry Smith if (aij->nsends) { 2371eb62cbbSBarry Smith send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 2381eb62cbbSBarry Smith CHKPTR(send_status); 2391eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2401eb62cbbSBarry Smith FREE(send_status); 2411eb62cbbSBarry Smith } 2421eb62cbbSBarry Smith FREE(aij->send_waits); FREE(aij->svalues); 2431eb62cbbSBarry Smith 2441eb62cbbSBarry Smith aij->insertmode = NotSetValues; 2451eb62cbbSBarry Smith ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 2461eb62cbbSBarry Smith ierr = MatEndAssembly(aij->A); CHKERR(ierr); 2471eb62cbbSBarry Smith 2481eb62cbbSBarry Smith ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 2491eb62cbbSBarry Smith ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 2501eb62cbbSBarry Smith ierr = MatEndAssembly(aij->B); CHKERR(ierr); 2518a729477SBarry Smith return 0; 2528a729477SBarry Smith } 2538a729477SBarry Smith 2541eb62cbbSBarry Smith static int MatiZero(Mat A) 2551eb62cbbSBarry Smith { 2561eb62cbbSBarry Smith Matimpiaij *l = (Matimpiaij *) A->data; 2571eb62cbbSBarry Smith 2581eb62cbbSBarry Smith MatZeroEntries(l->A); MatZeroEntries(l->B); 2591eb62cbbSBarry Smith return 0; 2601eb62cbbSBarry Smith } 2611eb62cbbSBarry Smith 2621eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2631eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2641eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2651eb62cbbSBarry Smith 2661eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2671eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2681eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2691eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2701eb62cbbSBarry Smith routine. 2711eb62cbbSBarry Smith */ 2721eb62cbbSBarry Smith static int MatiZerorows(Mat A,IS is,Scalar *diag) 2731eb62cbbSBarry Smith { 2741eb62cbbSBarry Smith Matimpiaij *l = (Matimpiaij *) A->data; 2751eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2761eb62cbbSBarry Smith int *localrows,*procs,*nprocs,j,found,idx,nsends,*work; 2771eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2781eb62cbbSBarry Smith int *rvalues,tag = 67,count,base,slen,n,len,*source; 2791eb62cbbSBarry Smith int *lens,index,*lrows,*values; 2801eb62cbbSBarry Smith MPI_Comm comm = l->comm; 2811eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2821eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2831eb62cbbSBarry Smith IS istmp; 2841eb62cbbSBarry Smith 2851eb62cbbSBarry Smith ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 2861eb62cbbSBarry Smith ierr = ISGetIndices(is,&rows); CHKERR(ierr); 2871eb62cbbSBarry Smith 2881eb62cbbSBarry Smith /* first count number of contributors to each processor */ 2891eb62cbbSBarry Smith nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 2901eb62cbbSBarry Smith MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 2911eb62cbbSBarry Smith owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 2921eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 2931eb62cbbSBarry Smith idx = rows[i]; 2941eb62cbbSBarry Smith found = 0; 2951eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 2961eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 2971eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2981eb62cbbSBarry Smith } 2991eb62cbbSBarry Smith } 3001eb62cbbSBarry Smith if (!found) SETERR(1,"Index out of range"); 3011eb62cbbSBarry Smith } 3021eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3031eb62cbbSBarry Smith 3041eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3051eb62cbbSBarry Smith work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 3061eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3071eb62cbbSBarry Smith nrecvs = work[mytid]; 3081eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3091eb62cbbSBarry Smith nmax = work[mytid]; 3101eb62cbbSBarry Smith FREE(work); 3111eb62cbbSBarry Smith 3121eb62cbbSBarry Smith /* post receives: */ 3131eb62cbbSBarry Smith rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */ 3141eb62cbbSBarry Smith CHKPTR(rvalues); 3151eb62cbbSBarry Smith recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 3161eb62cbbSBarry Smith CHKPTR(recv_waits); 3171eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3181eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3191eb62cbbSBarry Smith comm,recv_waits+i); 3201eb62cbbSBarry Smith } 3211eb62cbbSBarry Smith 3221eb62cbbSBarry Smith /* do sends: 3231eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3241eb62cbbSBarry Smith the ith processor 3251eb62cbbSBarry Smith */ 3261eb62cbbSBarry Smith svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 3271eb62cbbSBarry Smith send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 3281eb62cbbSBarry Smith CHKPTR(send_waits); 3291eb62cbbSBarry Smith starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 3301eb62cbbSBarry Smith starts[0] = 0; 3311eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3321eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3331eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3341eb62cbbSBarry Smith } 3351eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith starts[0] = 0; 3381eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3391eb62cbbSBarry Smith count = 0; 3401eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3411eb62cbbSBarry Smith if (procs[i]) { 3421eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3431eb62cbbSBarry Smith comm,send_waits+count++); 3441eb62cbbSBarry Smith } 3451eb62cbbSBarry Smith } 3461eb62cbbSBarry Smith FREE(starts); 3471eb62cbbSBarry Smith 3481eb62cbbSBarry Smith base = owners[mytid]; 3491eb62cbbSBarry Smith 3501eb62cbbSBarry Smith /* wait on receives */ 3511eb62cbbSBarry Smith lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 3521eb62cbbSBarry Smith source = lens + nrecvs; 3531eb62cbbSBarry Smith count = nrecvs; slen = 0; 3541eb62cbbSBarry Smith while (count) { 3551eb62cbbSBarry Smith MPI_Waitany(nrecvs,recv_waits,&index,&recv_status); 3561eb62cbbSBarry Smith /* unpack receives into our local space */ 3571eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 3581eb62cbbSBarry Smith source[index] = recv_status.MPI_SOURCE; 3591eb62cbbSBarry Smith lens[index] = n; 3601eb62cbbSBarry Smith slen += n; 3611eb62cbbSBarry Smith count--; 3621eb62cbbSBarry Smith } 3631eb62cbbSBarry Smith FREE(recv_waits); 3641eb62cbbSBarry Smith 3651eb62cbbSBarry Smith /* move the data into the send scatter */ 3661eb62cbbSBarry Smith lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 3671eb62cbbSBarry Smith count = 0; 3681eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3691eb62cbbSBarry Smith values = rvalues + i*nmax; 3701eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3711eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3721eb62cbbSBarry Smith } 3731eb62cbbSBarry Smith } 3741eb62cbbSBarry Smith FREE(rvalues); FREE(lens); 3751eb62cbbSBarry Smith FREE(owner); FREE(nprocs); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith /* actually zap the local rows */ 3781eb62cbbSBarry Smith ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 3791eb62cbbSBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 3801eb62cbbSBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 3811eb62cbbSBarry Smith ierr = ISDestroy(istmp); CHKERR(ierr); 3821eb62cbbSBarry Smith 3831eb62cbbSBarry Smith /* wait on sends */ 3841eb62cbbSBarry Smith if (nsends) { 3851eb62cbbSBarry Smith send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 3861eb62cbbSBarry Smith CHKPTR(send_status); 3871eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 3881eb62cbbSBarry Smith FREE(send_status); 3891eb62cbbSBarry Smith } 3901eb62cbbSBarry Smith FREE(send_waits); FREE(svalues); 3911eb62cbbSBarry Smith 3921eb62cbbSBarry Smith 3931eb62cbbSBarry Smith return 0; 3941eb62cbbSBarry Smith } 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 3971eb62cbbSBarry Smith { 3981eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 3991eb62cbbSBarry Smith int ierr; 4001eb62cbbSBarry Smith 4011eb62cbbSBarry Smith ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,&aij->Mvctx); 4021eb62cbbSBarry Smith CHKERR(ierr); 4031eb62cbbSBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 4041eb62cbbSBarry Smith ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,&aij->Mvctx); CHKERR(ierr); 4051eb62cbbSBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 4061eb62cbbSBarry Smith return 0; 4071eb62cbbSBarry Smith } 4081eb62cbbSBarry Smith 4091eb62cbbSBarry Smith /* 4101eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4111eb62cbbSBarry Smith diagonal block 4121eb62cbbSBarry Smith */ 4131eb62cbbSBarry Smith static int MatiAIJgetdiag(Mat Ain,Vec v) 4141eb62cbbSBarry Smith { 4151eb62cbbSBarry Smith Matimpiaij *A = (Matimpiaij *) Ain->data; 4161eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4171eb62cbbSBarry Smith } 4181eb62cbbSBarry Smith 4191eb62cbbSBarry Smith static int MatiAIJdestroy(PetscObject obj) 4201eb62cbbSBarry Smith { 4211eb62cbbSBarry Smith Mat mat = (Mat) obj; 4221eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 4231eb62cbbSBarry Smith int ierr; 4241eb62cbbSBarry Smith FREE(aij->rowners); 4251eb62cbbSBarry Smith ierr = MatDestroy(aij->A); CHKERR(ierr); 4261eb62cbbSBarry Smith ierr = MatDestroy(aij->B); CHKERR(ierr); 4271eb62cbbSBarry Smith FREE(aij); FREE(mat); 4281eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 4291eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 4301eb62cbbSBarry Smith return 0; 4311eb62cbbSBarry Smith } 4321eb62cbbSBarry Smith 4331eb62cbbSBarry Smith static int MatiView(PetscObject obj,Viewer viewer) 4341eb62cbbSBarry Smith { 4351eb62cbbSBarry Smith Mat mat = (Mat) obj; 4361eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 4371eb62cbbSBarry Smith int ierr; 4381eb62cbbSBarry Smith 4391eb62cbbSBarry Smith MPE_Seq_begin(aij->comm,1); 4401eb62cbbSBarry Smith printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 4411eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 4421eb62cbbSBarry Smith aij->cend); 4431eb62cbbSBarry Smith ierr = MatView(aij->A,viewer); CHKERR(ierr); 4441eb62cbbSBarry Smith ierr = MatView(aij->B,viewer); CHKERR(ierr); 4451eb62cbbSBarry Smith MPE_Seq_end(aij->comm,1); 4461eb62cbbSBarry Smith return 0; 4471eb62cbbSBarry Smith } 4481eb62cbbSBarry Smith 4491eb62cbbSBarry Smith /* 4501eb62cbbSBarry Smith This has to provide several versions. 4511eb62cbbSBarry Smith 4521eb62cbbSBarry Smith 1) per sequential 4531eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 4541eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 4551eb62cbbSBarry Smith 3) color updating out values betwen colors. (this imples an 4561eb62cbbSBarry Smith ordering that is sort of related to the IS argument, it 4571eb62cbbSBarry Smith is not clear a IS argument makes the most sense perhaps it 4581eb62cbbSBarry Smith should be dropped. 4591eb62cbbSBarry Smith */ 4608a729477SBarry Smith static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,IS is, 4618a729477SBarry Smith int its,Vec xx) 4628a729477SBarry Smith { 4631eb62cbbSBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 4641eb62cbbSBarry Smith Scalar zero = 0.0; 4658a729477SBarry Smith int ierr,one = 1, tmp, *idx, *diag; 4668a729477SBarry Smith int n = mat->n, m = mat->m, i, j; 4678a729477SBarry Smith 4688a729477SBarry Smith if (is) SETERR(1,"No support for ordering in relaxation"); 4698a729477SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 4708a729477SBarry Smith if (ierr = VecSet(&zero,xx)) return ierr; 4718a729477SBarry Smith } 4721eb62cbbSBarry Smith 4731eb62cbbSBarry Smith /* update outer values from other processors*/ 4741eb62cbbSBarry Smith 4751eb62cbbSBarry Smith /* smooth locally */ 4768a729477SBarry Smith return 0; 4778a729477SBarry Smith } 478*c74985f6SBarry Smith static int MatiAIJinsopt(Mat aijin,int op) 479*c74985f6SBarry Smith { 480*c74985f6SBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 481*c74985f6SBarry Smith 482*c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 483*c74985f6SBarry Smith MatSetOption(aij->A,op); 484*c74985f6SBarry Smith MatSetOption(aij->B,op); 485*c74985f6SBarry Smith } 486*c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 487*c74985f6SBarry Smith MatSetOption(aij->A,op); 488*c74985f6SBarry Smith MatSetOption(aij->B,op); 489*c74985f6SBarry Smith } 490*c74985f6SBarry Smith else if (op == ALLOW_OUT_OF_RANGE) aij->outofrange = 1; 491*c74985f6SBarry Smith else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 492*c74985f6SBarry Smith return 0; 493*c74985f6SBarry Smith } 494*c74985f6SBarry Smith 495*c74985f6SBarry Smith static int MatiAIJsize(Mat matin,int *m,int *n) 496*c74985f6SBarry Smith { 497*c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 498*c74985f6SBarry Smith *m = mat->M; *n = mat->N; 499*c74985f6SBarry Smith return 0; 500*c74985f6SBarry Smith } 501*c74985f6SBarry Smith 502*c74985f6SBarry Smith static int MatiAIJlocalsize(Mat matin,int *m,int *n) 503*c74985f6SBarry Smith { 504*c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 505*c74985f6SBarry Smith *m = mat->m; *n = mat->n; 506*c74985f6SBarry Smith return 0; 507*c74985f6SBarry Smith } 508*c74985f6SBarry Smith 509*c74985f6SBarry Smith static int MatiAIJrange(Mat matin,int *m,int *n) 510*c74985f6SBarry Smith { 511*c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 512*c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 513*c74985f6SBarry Smith return 0; 514*c74985f6SBarry Smith } 515*c74985f6SBarry Smith 5168a729477SBarry Smith /* -------------------------------------------------------------------*/ 5171eb62cbbSBarry Smith static struct _MatOps MatOps = {MatiAIJInsertValues, 5188a729477SBarry Smith 0, 0, 5191eb62cbbSBarry Smith MatiAIJMult,0,0,0, 5201eb62cbbSBarry Smith 0,0,0,0, 5218a729477SBarry Smith 0,0, 5228a729477SBarry Smith MatiAIJrelax, 5238a729477SBarry Smith 0, 5241eb62cbbSBarry Smith 0,0,0, 5258a729477SBarry Smith 0, 5268a729477SBarry Smith MatiAIJgetdiag,0,0, 5271eb62cbbSBarry Smith MatiAIJBeginAssemble,MatiAIJEndAssemble, 5281eb62cbbSBarry Smith 0, 529*c74985f6SBarry Smith MatiAIJinsopt,MatiZero,MatiZerorows,0, 530*c74985f6SBarry Smith 0,0,0,0, 531*c74985f6SBarry Smith MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 5328a729477SBarry Smith 5338a729477SBarry Smith 5348a729477SBarry Smith 5358a729477SBarry Smith /*@ 5368a729477SBarry Smith 5371eb62cbbSBarry Smith MatCreateMPIAIJ - Creates a sparse parallel matrix 5381eb62cbbSBarry Smith in AIJ format. 5398a729477SBarry Smith 5408a729477SBarry Smith Input Parameters: 5411eb62cbbSBarry Smith . comm - MPI communicator 5421eb62cbbSBarry Smith . m,n - number of local rows and columns (or -1 to have calculated) 5431eb62cbbSBarry Smith . M,N - global rows and columns (or -1 to have calculated) 5441eb62cbbSBarry Smith . d_nz - total number nonzeros in diagonal portion of matrix 5451eb62cbbSBarry Smith . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 5468a729477SBarry Smith . You must leave room for the diagonal entry even if it is zero. 5471eb62cbbSBarry Smith . o_nz - total number nonzeros in off-diagonal portion of matrix 5481eb62cbbSBarry Smith . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 5491eb62cbbSBarry Smith . or null. You must have at least one nonzero per row. 5508a729477SBarry Smith 5518a729477SBarry Smith Output parameters: 5528a729477SBarry Smith . newmat - the matrix 5538a729477SBarry Smith 5541eb62cbbSBarry Smith Keywords: matrix, aij, compressed row, sparse, parallel 5558a729477SBarry Smith @*/ 5561eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 5571eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 5588a729477SBarry Smith { 5598a729477SBarry Smith Mat mat; 5601eb62cbbSBarry Smith Matimpiaij *aij; 5611eb62cbbSBarry Smith int ierr, i,rl,len,sum[2],work[2]; 5628a729477SBarry Smith *newmat = 0; 5638a729477SBarry Smith CREATEHEADER(mat,_Mat); 5641eb62cbbSBarry Smith mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 5658a729477SBarry Smith mat->cookie = MAT_COOKIE; 5668a729477SBarry Smith mat->ops = &MatOps; 5678a729477SBarry Smith mat->destroy = MatiAIJdestroy; 5681eb62cbbSBarry Smith mat->view = MatiView; 5691eb62cbbSBarry Smith mat->type = MATAIJMPI; 5708a729477SBarry Smith mat->factor = 0; 5718a729477SBarry Smith mat->row = 0; 5728a729477SBarry Smith mat->col = 0; 5731eb62cbbSBarry Smith aij->comm = comm; 5741eb62cbbSBarry Smith aij->insertmode = NotSetValues; 5751eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 5761eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 5771eb62cbbSBarry Smith 5781eb62cbbSBarry Smith if (M == -1 || N == -1) { 5791eb62cbbSBarry Smith work[0] = m; work[1] = n; 5801eb62cbbSBarry Smith MPI_Allreduce((void *) work,(void *) sum,1,MPI_INT,MPI_SUM,comm ); 5811eb62cbbSBarry Smith if (M == -1) M = sum[0]; 5821eb62cbbSBarry Smith if (N == -1) N = sum[1]; 5831eb62cbbSBarry Smith } 5841eb62cbbSBarry Smith if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 5851eb62cbbSBarry Smith if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 5868a729477SBarry Smith aij->m = m; 5878a729477SBarry Smith aij->n = n; 5881eb62cbbSBarry Smith aij->N = N; 5891eb62cbbSBarry Smith aij->M = M; 5901eb62cbbSBarry Smith 5911eb62cbbSBarry Smith /* build local table of row and column ownerships */ 5921eb62cbbSBarry Smith aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 5931eb62cbbSBarry Smith CHKPTR(aij->rowners); 5941eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 5951eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 5961eb62cbbSBarry Smith aij->rowners[0] = 0; 5971eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 5981eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 5998a729477SBarry Smith } 6001eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 6011eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 6021eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 6031eb62cbbSBarry Smith aij->cowners[0] = 0; 6041eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 6051eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 6068a729477SBarry Smith } 6071eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 6081eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 6098a729477SBarry Smith 6108a729477SBarry Smith 6111eb62cbbSBarry Smith ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 6121eb62cbbSBarry Smith ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 6138a729477SBarry Smith 6141eb62cbbSBarry Smith /* build cache for off array entries formed */ 6151eb62cbbSBarry Smith aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 6161eb62cbbSBarry Smith aij->stash.n = 0; 6171eb62cbbSBarry Smith aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 6181eb62cbbSBarry Smith sizeof(Scalar))); CHKPTR(aij->stash.array); 6191eb62cbbSBarry Smith aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 6201eb62cbbSBarry Smith aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 6218a729477SBarry Smith 6221eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 6231eb62cbbSBarry Smith aij->lvec = 0; 6241eb62cbbSBarry Smith aij->Mvctx = 0; 6258a729477SBarry Smith 626*c74985f6SBarry Smith aij->outofrange = 0; 627*c74985f6SBarry Smith 6288a729477SBarry Smith *newmat = mat; 6298a729477SBarry Smith return 0; 6308a729477SBarry Smith } 631