18a729477SBarry Smith 21eb62cbbSBarry Smith #include "mpiaij.h" 38a729477SBarry Smith #include "vec/vecimpl.h" 4*d6dfbf8fSBarry Smith #include "inline/spops.h" 58a729477SBarry Smith 61eb62cbbSBarry Smith #define CHUNCKSIZE 100 71eb62cbbSBarry Smith /* 81eb62cbbSBarry Smith This is a simple minded stash. Do a linear search to determine if 91eb62cbbSBarry Smith in stash, if not add to end. 101eb62cbbSBarry Smith */ 111eb62cbbSBarry Smith static int StashValues(Stash *stash,int row,int n, int *idxn, 121eb62cbbSBarry Smith Scalar *values,InsertMode addv) 138a729477SBarry Smith { 141eb62cbbSBarry Smith int i,j,N = stash->n,found,*n_idx, *n_idy; 151eb62cbbSBarry Smith Scalar val,*n_array; 168a729477SBarry Smith 171eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 181eb62cbbSBarry Smith found = 0; 191eb62cbbSBarry Smith val = *values++; 208a729477SBarry Smith for ( j=0; j<N; j++ ) { 211eb62cbbSBarry Smith if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 221eb62cbbSBarry Smith /* found a match */ 231eb62cbbSBarry Smith if (addv == AddValues) stash->array[j] += val; 241eb62cbbSBarry Smith else stash->array[j] = val; 251eb62cbbSBarry Smith found = 1; 268a729477SBarry Smith break; 278a729477SBarry Smith } 288a729477SBarry Smith } 291eb62cbbSBarry Smith if (!found) { /* not found so add to end */ 301eb62cbbSBarry Smith if ( stash->n == stash->nmax ) { 311eb62cbbSBarry Smith /* allocate a larger stash */ 321eb62cbbSBarry Smith n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 331eb62cbbSBarry Smith 2*sizeof(int) + sizeof(Scalar))); 341eb62cbbSBarry Smith CHKPTR(n_array); 351eb62cbbSBarry Smith n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 361eb62cbbSBarry Smith n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 371eb62cbbSBarry Smith MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 381eb62cbbSBarry Smith MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 391eb62cbbSBarry Smith MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 401eb62cbbSBarry Smith if (stash->array) FREE(stash->array); 411eb62cbbSBarry Smith stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 421eb62cbbSBarry Smith stash->nmax += CHUNCKSIZE; 431eb62cbbSBarry Smith } 441eb62cbbSBarry Smith stash->array[stash->n] = val; 451eb62cbbSBarry Smith stash->idx[stash->n] = row; 461eb62cbbSBarry Smith stash->idy[stash->n++] = idxn[i]; 471eb62cbbSBarry Smith } 488a729477SBarry Smith } 498a729477SBarry Smith return 0; 508a729477SBarry Smith } 518a729477SBarry Smith 521eb62cbbSBarry Smith static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 531eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 548a729477SBarry Smith { 551eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 561eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 571eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 588a729477SBarry Smith 591eb62cbbSBarry Smith if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 601eb62cbbSBarry Smith SETERR(1,"You cannot mix inserts and adds"); 618a729477SBarry Smith } 621eb62cbbSBarry Smith aij->insertmode = addv; 638a729477SBarry Smith for ( i=0; i<m; i++ ) { 64*d6dfbf8fSBarry Smith if (idxm[i] < 0) continue; 651eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 661eb62cbbSBarry Smith row = idxm[i] - rstart; 671eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 68*d6dfbf8fSBarry Smith if (idxn[j] < 0) continue; 691eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 701eb62cbbSBarry Smith col = idxn[j] - cstart; 711eb62cbbSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 721eb62cbbSBarry Smith } 731eb62cbbSBarry Smith else { 74*d6dfbf8fSBarry Smith if (aij->assembled) { 75*d6dfbf8fSBarry Smith SETERR(1,"Cannot insert off diagonal block in already\ 76*d6dfbf8fSBarry Smith assembled matrix. Contact petsc-maint@mcs.anl.gov\ 77*d6dfbf8fSBarry Smith if your need this feature"); 78*d6dfbf8fSBarry Smith } 791eb62cbbSBarry Smith col = idxn[j]; 801eb62cbbSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 811eb62cbbSBarry Smith } 821eb62cbbSBarry Smith } 831eb62cbbSBarry Smith } 841eb62cbbSBarry Smith else { 851eb62cbbSBarry Smith ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 861eb62cbbSBarry Smith } 878a729477SBarry Smith } 888a729477SBarry Smith return 0; 898a729477SBarry Smith } 908a729477SBarry Smith 918a729477SBarry Smith /* 921eb62cbbSBarry Smith the assembly code is alot like the code for vectors, we should 931eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 941eb62cbbSBarry Smith either case. 958a729477SBarry Smith */ 968a729477SBarry Smith 971eb62cbbSBarry Smith static int MatiAIJBeginAssemble(Mat mat) 988a729477SBarry Smith { 991eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 100*d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 1011eb62cbbSBarry Smith int ierr, numtids = aij->numtids, *owners = aij->rowners; 1021eb62cbbSBarry Smith int mytid = aij->mytid; 1031eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1041eb62cbbSBarry Smith int *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work; 1051eb62cbbSBarry Smith int tag = 50, *owner,*starts,count; 1061eb62cbbSBarry Smith InsertMode addv; 1071eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1081eb62cbbSBarry Smith 1091eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 1101eb62cbbSBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT, 1111eb62cbbSBarry Smith MPI_BOR,comm); 1121eb62cbbSBarry Smith if (addv == (AddValues|InsertValues)) { 1131eb62cbbSBarry Smith SETERR(1,"Some processors have inserted while others have added"); 1141eb62cbbSBarry Smith } 1151eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1161eb62cbbSBarry Smith 1171eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1181eb62cbbSBarry Smith nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 1191eb62cbbSBarry Smith MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 1201eb62cbbSBarry Smith owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 1211eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1221eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1231eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1241eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1251eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1268a729477SBarry Smith } 1278a729477SBarry Smith } 1288a729477SBarry Smith } 1291eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1301eb62cbbSBarry Smith 1311eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1321eb62cbbSBarry Smith work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 1331eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1341eb62cbbSBarry Smith nreceives = work[mytid]; 1351eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1361eb62cbbSBarry Smith nmax = work[mytid]; 1371eb62cbbSBarry Smith FREE(work); 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith /* post receives: 1401eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1411eb62cbbSBarry Smith (global index,value) we store the global index as a double 142*d6dfbf8fSBarry Smith to simplify the message passing. 1431eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1441eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1451eb62cbbSBarry Smith this is a lot of wasted space. 1461eb62cbbSBarry Smith 1471eb62cbbSBarry Smith 1481eb62cbbSBarry Smith This could be done better. 1491eb62cbbSBarry Smith */ 1501eb62cbbSBarry Smith rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar)); 1511eb62cbbSBarry Smith CHKPTR(rvalues); 1521eb62cbbSBarry Smith recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 1531eb62cbbSBarry Smith CHKPTR(recv_waits); 1541eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 1551eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 1561eb62cbbSBarry Smith comm,recv_waits+i); 1571eb62cbbSBarry Smith } 1581eb62cbbSBarry Smith 1591eb62cbbSBarry Smith /* do sends: 1601eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1611eb62cbbSBarry Smith the ith processor 1621eb62cbbSBarry Smith */ 1631eb62cbbSBarry Smith svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 1641eb62cbbSBarry Smith CHKPTR(svalues); 1651eb62cbbSBarry Smith send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 1661eb62cbbSBarry Smith CHKPTR(send_waits); 1671eb62cbbSBarry Smith starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 1681eb62cbbSBarry Smith starts[0] = 0; 1691eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1701eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1711eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1721eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1731eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1741eb62cbbSBarry Smith } 1751eb62cbbSBarry Smith FREE(owner); 1761eb62cbbSBarry Smith starts[0] = 0; 1771eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1781eb62cbbSBarry Smith count = 0; 1791eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1801eb62cbbSBarry Smith if (procs[i]) { 1811eb62cbbSBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 1821eb62cbbSBarry Smith comm,send_waits+count++); 1831eb62cbbSBarry Smith } 1841eb62cbbSBarry Smith } 1851eb62cbbSBarry Smith FREE(starts); FREE(nprocs); 1861eb62cbbSBarry Smith 1871eb62cbbSBarry Smith /* Free cache space */ 1881eb62cbbSBarry Smith aij->stash.nmax = aij->stash.n = 0; 1891eb62cbbSBarry Smith if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 1901eb62cbbSBarry Smith 1911eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1921eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1931eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1941eb62cbbSBarry Smith aij->rmax = nmax; 1951eb62cbbSBarry Smith 1961eb62cbbSBarry Smith return 0; 1971eb62cbbSBarry Smith } 1981eb62cbbSBarry Smith extern int MPIAIJSetUpMultiply(Mat); 1991eb62cbbSBarry Smith 2001eb62cbbSBarry Smith static int MatiAIJEndAssemble(Mat mat) 2011eb62cbbSBarry Smith { 2021eb62cbbSBarry Smith int ierr; 2031eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 2041eb62cbbSBarry Smith 2051eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 206*d6dfbf8fSBarry Smith int imdex,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n; 2071eb62cbbSBarry Smith int row,col; 2081eb62cbbSBarry Smith Scalar *values,val; 2091eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2101eb62cbbSBarry Smith 2111eb62cbbSBarry Smith /* wait on receives */ 2121eb62cbbSBarry Smith while (count) { 213*d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2141eb62cbbSBarry Smith /* unpack receives into our local space */ 215*d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 2161eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_SCALAR,&n); 2171eb62cbbSBarry Smith n = n/3; 2181eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2191eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2201eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2211eb62cbbSBarry Smith val = values[3*i+2]; 2221eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2231eb62cbbSBarry Smith col -= aij->cstart; 2241eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2251eb62cbbSBarry Smith } 2261eb62cbbSBarry Smith else { 227*d6dfbf8fSBarry Smith if (aij->assembled) { 228*d6dfbf8fSBarry Smith SETERR(1,"Cannot insert off diagonal block in already\ 229*d6dfbf8fSBarry Smith assembled matrix. Contact petsc-maint@mcs.anl.gov\ 230*d6dfbf8fSBarry Smith if your need this feature"); 231*d6dfbf8fSBarry Smith } 2321eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2331eb62cbbSBarry Smith } 2341eb62cbbSBarry Smith } 2351eb62cbbSBarry Smith count--; 2361eb62cbbSBarry Smith } 2371eb62cbbSBarry Smith FREE(aij->recv_waits); FREE(aij->rvalues); 2381eb62cbbSBarry Smith 2391eb62cbbSBarry Smith /* wait on sends */ 2401eb62cbbSBarry Smith if (aij->nsends) { 2411eb62cbbSBarry Smith send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 2421eb62cbbSBarry Smith CHKPTR(send_status); 2431eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2441eb62cbbSBarry Smith FREE(send_status); 2451eb62cbbSBarry Smith } 2461eb62cbbSBarry Smith FREE(aij->send_waits); FREE(aij->svalues); 2471eb62cbbSBarry Smith 2481eb62cbbSBarry Smith aij->insertmode = NotSetValues; 2491eb62cbbSBarry Smith ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 2501eb62cbbSBarry Smith ierr = MatEndAssembly(aij->A); CHKERR(ierr); 2511eb62cbbSBarry Smith 252*d6dfbf8fSBarry Smith 253*d6dfbf8fSBarry Smith if (aij->assembled) { 254*d6dfbf8fSBarry Smith /* this is because the present implimentation doesn't support 255*d6dfbf8fSBarry Smith inserting values off the diagonal block.*/ 256*d6dfbf8fSBarry Smith return 0; 257*d6dfbf8fSBarry Smith } 258*d6dfbf8fSBarry Smith 2591eb62cbbSBarry Smith ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 2601eb62cbbSBarry Smith ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 2611eb62cbbSBarry Smith ierr = MatEndAssembly(aij->B); CHKERR(ierr); 262*d6dfbf8fSBarry Smith 263*d6dfbf8fSBarry Smith aij->assembled = 1; 2648a729477SBarry Smith return 0; 2658a729477SBarry Smith } 2668a729477SBarry Smith 2671eb62cbbSBarry Smith static int MatiZero(Mat A) 2681eb62cbbSBarry Smith { 2691eb62cbbSBarry Smith Matimpiaij *l = (Matimpiaij *) A->data; 2701eb62cbbSBarry Smith 2711eb62cbbSBarry Smith MatZeroEntries(l->A); MatZeroEntries(l->B); 2721eb62cbbSBarry Smith return 0; 2731eb62cbbSBarry Smith } 2741eb62cbbSBarry Smith 2751eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2761eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2771eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2781eb62cbbSBarry Smith 2791eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2801eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2811eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2821eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2831eb62cbbSBarry Smith routine. 2841eb62cbbSBarry Smith */ 2851eb62cbbSBarry Smith static int MatiZerorows(Mat A,IS is,Scalar *diag) 2861eb62cbbSBarry Smith { 2871eb62cbbSBarry Smith Matimpiaij *l = (Matimpiaij *) A->data; 2881eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2891eb62cbbSBarry Smith int *localrows,*procs,*nprocs,j,found,idx,nsends,*work; 2901eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2911eb62cbbSBarry Smith int *rvalues,tag = 67,count,base,slen,n,len,*source; 292*d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 293*d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2941eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2951eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2961eb62cbbSBarry Smith IS istmp; 2971eb62cbbSBarry Smith 2981eb62cbbSBarry Smith ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 2991eb62cbbSBarry Smith ierr = ISGetIndices(is,&rows); CHKERR(ierr); 3001eb62cbbSBarry Smith 3011eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3021eb62cbbSBarry Smith nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 3031eb62cbbSBarry Smith MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 3041eb62cbbSBarry Smith owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 3051eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3061eb62cbbSBarry Smith idx = rows[i]; 3071eb62cbbSBarry Smith found = 0; 3081eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 3091eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3101eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3111eb62cbbSBarry Smith } 3121eb62cbbSBarry Smith } 313*d6dfbf8fSBarry Smith if (!found) SETERR(1,"Imdex out of range"); 3141eb62cbbSBarry Smith } 3151eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3161eb62cbbSBarry Smith 3171eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3181eb62cbbSBarry Smith work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 3191eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3201eb62cbbSBarry Smith nrecvs = work[mytid]; 3211eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3221eb62cbbSBarry Smith nmax = work[mytid]; 3231eb62cbbSBarry Smith FREE(work); 3241eb62cbbSBarry Smith 3251eb62cbbSBarry Smith /* post receives: */ 3261eb62cbbSBarry Smith rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */ 3271eb62cbbSBarry Smith CHKPTR(rvalues); 3281eb62cbbSBarry Smith recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 3291eb62cbbSBarry Smith CHKPTR(recv_waits); 3301eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3311eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3321eb62cbbSBarry Smith comm,recv_waits+i); 3331eb62cbbSBarry Smith } 3341eb62cbbSBarry Smith 3351eb62cbbSBarry Smith /* do sends: 3361eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3371eb62cbbSBarry Smith the ith processor 3381eb62cbbSBarry Smith */ 3391eb62cbbSBarry Smith svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 3401eb62cbbSBarry Smith send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 3411eb62cbbSBarry Smith CHKPTR(send_waits); 3421eb62cbbSBarry Smith starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 3431eb62cbbSBarry Smith starts[0] = 0; 3441eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3451eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3461eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3471eb62cbbSBarry Smith } 3481eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3491eb62cbbSBarry Smith 3501eb62cbbSBarry Smith starts[0] = 0; 3511eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3521eb62cbbSBarry Smith count = 0; 3531eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3541eb62cbbSBarry Smith if (procs[i]) { 3551eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3561eb62cbbSBarry Smith comm,send_waits+count++); 3571eb62cbbSBarry Smith } 3581eb62cbbSBarry Smith } 3591eb62cbbSBarry Smith FREE(starts); 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith base = owners[mytid]; 3621eb62cbbSBarry Smith 3631eb62cbbSBarry Smith /* wait on receives */ 3641eb62cbbSBarry Smith lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 3651eb62cbbSBarry Smith source = lens + nrecvs; 3661eb62cbbSBarry Smith count = nrecvs; slen = 0; 3671eb62cbbSBarry Smith while (count) { 368*d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3691eb62cbbSBarry Smith /* unpack receives into our local space */ 3701eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 371*d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 372*d6dfbf8fSBarry Smith lens[imdex] = n; 3731eb62cbbSBarry Smith slen += n; 3741eb62cbbSBarry Smith count--; 3751eb62cbbSBarry Smith } 3761eb62cbbSBarry Smith FREE(recv_waits); 3771eb62cbbSBarry Smith 3781eb62cbbSBarry Smith /* move the data into the send scatter */ 3791eb62cbbSBarry Smith lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 3801eb62cbbSBarry Smith count = 0; 3811eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3821eb62cbbSBarry Smith values = rvalues + i*nmax; 3831eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3841eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3851eb62cbbSBarry Smith } 3861eb62cbbSBarry Smith } 3871eb62cbbSBarry Smith FREE(rvalues); FREE(lens); 3881eb62cbbSBarry Smith FREE(owner); FREE(nprocs); 3891eb62cbbSBarry Smith 3901eb62cbbSBarry Smith /* actually zap the local rows */ 3911eb62cbbSBarry Smith ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 3921eb62cbbSBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 3931eb62cbbSBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 3941eb62cbbSBarry Smith ierr = ISDestroy(istmp); CHKERR(ierr); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* wait on sends */ 3971eb62cbbSBarry Smith if (nsends) { 3981eb62cbbSBarry Smith send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 3991eb62cbbSBarry Smith CHKPTR(send_status); 4001eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4011eb62cbbSBarry Smith FREE(send_status); 4021eb62cbbSBarry Smith } 4031eb62cbbSBarry Smith FREE(send_waits); FREE(svalues); 4041eb62cbbSBarry Smith 4051eb62cbbSBarry Smith 4061eb62cbbSBarry Smith return 0; 4071eb62cbbSBarry Smith } 4081eb62cbbSBarry Smith 4091eb62cbbSBarry Smith static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 4101eb62cbbSBarry Smith { 4111eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 4121eb62cbbSBarry Smith int ierr; 4131eb62cbbSBarry Smith 414*d6dfbf8fSBarry Smith ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 4151eb62cbbSBarry Smith CHKERR(ierr); 4161eb62cbbSBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 417*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 418*d6dfbf8fSBarry Smith CHKERR(ierr); 4191eb62cbbSBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 4201eb62cbbSBarry Smith return 0; 4211eb62cbbSBarry Smith } 4221eb62cbbSBarry Smith 4231eb62cbbSBarry Smith /* 4241eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4251eb62cbbSBarry Smith diagonal block 4261eb62cbbSBarry Smith */ 4271eb62cbbSBarry Smith static int MatiAIJgetdiag(Mat Ain,Vec v) 4281eb62cbbSBarry Smith { 4291eb62cbbSBarry Smith Matimpiaij *A = (Matimpiaij *) Ain->data; 4301eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4311eb62cbbSBarry Smith } 4321eb62cbbSBarry Smith 4331eb62cbbSBarry Smith static int MatiAIJdestroy(PetscObject obj) 4341eb62cbbSBarry Smith { 4351eb62cbbSBarry Smith Mat mat = (Mat) obj; 4361eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 4371eb62cbbSBarry Smith int ierr; 4381eb62cbbSBarry Smith FREE(aij->rowners); 4391eb62cbbSBarry Smith ierr = MatDestroy(aij->A); CHKERR(ierr); 4401eb62cbbSBarry Smith ierr = MatDestroy(aij->B); CHKERR(ierr); 4411eb62cbbSBarry Smith FREE(aij); FREE(mat); 4421eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 4431eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 4441eb62cbbSBarry Smith return 0; 4451eb62cbbSBarry Smith } 4461eb62cbbSBarry Smith 4471eb62cbbSBarry Smith static int MatiView(PetscObject obj,Viewer viewer) 4481eb62cbbSBarry Smith { 4491eb62cbbSBarry Smith Mat mat = (Mat) obj; 4501eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 4511eb62cbbSBarry Smith int ierr; 4521eb62cbbSBarry Smith 453*d6dfbf8fSBarry Smith MPE_Seq_begin(mat->comm,1); 4541eb62cbbSBarry Smith printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 4551eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 4561eb62cbbSBarry Smith aij->cend); 4571eb62cbbSBarry Smith ierr = MatView(aij->A,viewer); CHKERR(ierr); 4581eb62cbbSBarry Smith ierr = MatView(aij->B,viewer); CHKERR(ierr); 459*d6dfbf8fSBarry Smith MPE_Seq_end(mat->comm,1); 4601eb62cbbSBarry Smith return 0; 4611eb62cbbSBarry Smith } 4621eb62cbbSBarry Smith 463*d6dfbf8fSBarry Smith extern int MatiAIJmarkdiag(Matiaij *); 4641eb62cbbSBarry Smith /* 4651eb62cbbSBarry Smith This has to provide several versions. 4661eb62cbbSBarry Smith 4671eb62cbbSBarry Smith 1) per sequential 4681eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 4691eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 470*d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 4711eb62cbbSBarry Smith */ 472*d6dfbf8fSBarry Smith static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 4738a729477SBarry Smith int its,Vec xx) 4748a729477SBarry Smith { 4751eb62cbbSBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 476*d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 477*d6dfbf8fSBarry Smith Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 478*d6dfbf8fSBarry Smith Scalar zero = 0.0,*b,*x,*w,*xs,*ls,d,*v,sum; 479*d6dfbf8fSBarry Smith int ierr,one = 1, tmp, *idx, *diag,mytid; 4808a729477SBarry Smith int n = mat->n, m = mat->m, i, j; 4818a729477SBarry Smith 4828a729477SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 483*d6dfbf8fSBarry Smith /* could be more efficient by incorporating with below */ 4848a729477SBarry Smith if (ierr = VecSet(&zero,xx)) return ierr; 4858a729477SBarry Smith } 4861eb62cbbSBarry Smith 487*d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 488*d6dfbf8fSBarry Smith xs = x - 1; /* shift by one for index start of 1 */ 489*d6dfbf8fSBarry Smith ls--;; 490*d6dfbf8fSBarry Smith if (!A->diag) {if (ierr = MatiAIJmarkdiag(A)) return ierr;} 491*d6dfbf8fSBarry Smith diag = A->diag; 4921eb62cbbSBarry Smith 493*d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 494*d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 495*d6dfbf8fSBarry Smith CHKERR(ierr); 496*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 497*d6dfbf8fSBarry Smith CHKERR(ierr); 498*d6dfbf8fSBarry Smith while (its--) { 499*d6dfbf8fSBarry Smith /* go down through the rows */ 500*d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 501*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 502*d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 503*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 504*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 505*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 506*d6dfbf8fSBarry Smith sum = b[i]; 507*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 508*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 509*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 510*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 511*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 512*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 513*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 514*d6dfbf8fSBarry Smith } 515*d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 516*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 517*d6dfbf8fSBarry Smith /* come up through the rows */ 518*d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 519*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 520*d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 521*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 522*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 523*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 524*d6dfbf8fSBarry Smith sum = b[i]; 525*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 526*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 527*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 528*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 529*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 530*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 531*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 532*d6dfbf8fSBarry Smith } 533*d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 534*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 535*d6dfbf8fSBarry Smith } 536*d6dfbf8fSBarry Smith } 537*d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 538*d6dfbf8fSBarry Smith while (its--) { 539*d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 540*d6dfbf8fSBarry Smith CHKERR(ierr); 541*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 542*d6dfbf8fSBarry Smith CHKERR(ierr); 543*d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 544*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 545*d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 546*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 547*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 548*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 549*d6dfbf8fSBarry Smith sum = b[i]; 550*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 551*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 552*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 553*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 554*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 555*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 556*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 557*d6dfbf8fSBarry Smith } 558*d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 559*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 560*d6dfbf8fSBarry Smith } 561*d6dfbf8fSBarry Smith } 562*d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 563*d6dfbf8fSBarry Smith while (its--) { 564*d6dfbf8fSBarry Smith ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 565*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 566*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 567*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 568*d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 569*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 570*d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 571*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 572*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 573*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 574*d6dfbf8fSBarry Smith sum = b[i]; 575*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 576*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 577*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 578*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 579*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 580*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 581*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 582*d6dfbf8fSBarry Smith } 583*d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 584*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 585*d6dfbf8fSBarry Smith } 586*d6dfbf8fSBarry Smith } 587*d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 588*d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 589*d6dfbf8fSBarry Smith CHKERR(ierr); 590*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 591*d6dfbf8fSBarry Smith CHKERR(ierr); 592*d6dfbf8fSBarry Smith while (its--) { 593*d6dfbf8fSBarry Smith /* go down through the rows */ 594*d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 595*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 596*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 597*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 598*d6dfbf8fSBarry Smith sum = b[i]; 599*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 600*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 601*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 602*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 603*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 604*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 605*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 606*d6dfbf8fSBarry Smith } 607*d6dfbf8fSBarry Smith /* come up through the rows */ 608*d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 609*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 610*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 611*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 612*d6dfbf8fSBarry Smith sum = b[i]; 613*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 614*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 615*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 616*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 617*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 618*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 619*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 620*d6dfbf8fSBarry Smith } 621*d6dfbf8fSBarry Smith } 622*d6dfbf8fSBarry Smith } 623*d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 624*d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 625*d6dfbf8fSBarry Smith CHKERR(ierr); 626*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 627*d6dfbf8fSBarry Smith CHKERR(ierr); 628*d6dfbf8fSBarry Smith while (its--) { 629*d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 630*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 631*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 632*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 633*d6dfbf8fSBarry Smith sum = b[i]; 634*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 635*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 636*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 637*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 638*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 639*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 640*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 641*d6dfbf8fSBarry Smith } 642*d6dfbf8fSBarry Smith } 643*d6dfbf8fSBarry Smith } 644*d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 645*d6dfbf8fSBarry Smith ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 646*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 647*d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 648*d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 649*d6dfbf8fSBarry Smith while (its--) { 650*d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 651*d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 652*d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 653*d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 654*d6dfbf8fSBarry Smith sum = b[i]; 655*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 656*d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 657*d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 658*d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 659*d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 660*d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 661*d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 662*d6dfbf8fSBarry Smith } 663*d6dfbf8fSBarry Smith } 664*d6dfbf8fSBarry Smith } 6658a729477SBarry Smith return 0; 6668a729477SBarry Smith } 667c74985f6SBarry Smith static int MatiAIJinsopt(Mat aijin,int op) 668c74985f6SBarry Smith { 669c74985f6SBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 670c74985f6SBarry Smith 671c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 672c74985f6SBarry Smith MatSetOption(aij->A,op); 673c74985f6SBarry Smith MatSetOption(aij->B,op); 674c74985f6SBarry Smith } 675c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 676c74985f6SBarry Smith MatSetOption(aij->A,op); 677c74985f6SBarry Smith MatSetOption(aij->B,op); 678c74985f6SBarry Smith } 679c74985f6SBarry Smith else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 680c74985f6SBarry Smith return 0; 681c74985f6SBarry Smith } 682c74985f6SBarry Smith 683c74985f6SBarry Smith static int MatiAIJsize(Mat matin,int *m,int *n) 684c74985f6SBarry Smith { 685c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 686c74985f6SBarry Smith *m = mat->M; *n = mat->N; 687c74985f6SBarry Smith return 0; 688c74985f6SBarry Smith } 689c74985f6SBarry Smith 690c74985f6SBarry Smith static int MatiAIJlocalsize(Mat matin,int *m,int *n) 691c74985f6SBarry Smith { 692c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 693c74985f6SBarry Smith *m = mat->m; *n = mat->n; 694c74985f6SBarry Smith return 0; 695c74985f6SBarry Smith } 696c74985f6SBarry Smith 697c74985f6SBarry Smith static int MatiAIJrange(Mat matin,int *m,int *n) 698c74985f6SBarry Smith { 699c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 700c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 701c74985f6SBarry Smith return 0; 702c74985f6SBarry Smith } 703c74985f6SBarry Smith 704*d6dfbf8fSBarry Smith static int MatiCopy(Mat,Mat *); 705*d6dfbf8fSBarry Smith 7068a729477SBarry Smith /* -------------------------------------------------------------------*/ 7071eb62cbbSBarry Smith static struct _MatOps MatOps = {MatiAIJInsertValues, 7088a729477SBarry Smith 0, 0, 7091eb62cbbSBarry Smith MatiAIJMult,0,0,0, 7101eb62cbbSBarry Smith 0,0,0,0, 7118a729477SBarry Smith 0,0, 7128a729477SBarry Smith MatiAIJrelax, 7138a729477SBarry Smith 0, 7141eb62cbbSBarry Smith 0,0,0, 715*d6dfbf8fSBarry Smith MatiCopy, 7168a729477SBarry Smith MatiAIJgetdiag,0,0, 7171eb62cbbSBarry Smith MatiAIJBeginAssemble,MatiAIJEndAssemble, 7181eb62cbbSBarry Smith 0, 719c74985f6SBarry Smith MatiAIJinsopt,MatiZero,MatiZerorows,0, 720c74985f6SBarry Smith 0,0,0,0, 721c74985f6SBarry Smith MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 7228a729477SBarry Smith 7238a729477SBarry Smith 7248a729477SBarry Smith 7258a729477SBarry Smith /*@ 7268a729477SBarry Smith 7271eb62cbbSBarry Smith MatCreateMPIAIJ - Creates a sparse parallel matrix 7281eb62cbbSBarry Smith in AIJ format. 7298a729477SBarry Smith 7308a729477SBarry Smith Input Parameters: 7311eb62cbbSBarry Smith . comm - MPI communicator 7321eb62cbbSBarry Smith . m,n - number of local rows and columns (or -1 to have calculated) 7331eb62cbbSBarry Smith . M,N - global rows and columns (or -1 to have calculated) 7341eb62cbbSBarry Smith . d_nz - total number nonzeros in diagonal portion of matrix 7351eb62cbbSBarry Smith . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 7368a729477SBarry Smith . You must leave room for the diagonal entry even if it is zero. 7371eb62cbbSBarry Smith . o_nz - total number nonzeros in off-diagonal portion of matrix 7381eb62cbbSBarry Smith . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 7391eb62cbbSBarry Smith . or null. You must have at least one nonzero per row. 7408a729477SBarry Smith 7418a729477SBarry Smith Output parameters: 7428a729477SBarry Smith . newmat - the matrix 7438a729477SBarry Smith 7441eb62cbbSBarry Smith Keywords: matrix, aij, compressed row, sparse, parallel 7458a729477SBarry Smith @*/ 7461eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 7471eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 7488a729477SBarry Smith { 7498a729477SBarry Smith Mat mat; 7501eb62cbbSBarry Smith Matimpiaij *aij; 7511eb62cbbSBarry Smith int ierr, i,rl,len,sum[2],work[2]; 7528a729477SBarry Smith *newmat = 0; 7538a729477SBarry Smith CREATEHEADER(mat,_Mat); 7541eb62cbbSBarry Smith mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 7558a729477SBarry Smith mat->cookie = MAT_COOKIE; 7568a729477SBarry Smith mat->ops = &MatOps; 7578a729477SBarry Smith mat->destroy = MatiAIJdestroy; 7581eb62cbbSBarry Smith mat->view = MatiView; 7591eb62cbbSBarry Smith mat->type = MATAIJMPI; 7608a729477SBarry Smith mat->factor = 0; 7618a729477SBarry Smith mat->row = 0; 7628a729477SBarry Smith mat->col = 0; 763*d6dfbf8fSBarry Smith mat->outofrange = 0; 764*d6dfbf8fSBarry Smith mat->Mlow = 0; 765*d6dfbf8fSBarry Smith mat->Mhigh = M; 766*d6dfbf8fSBarry Smith mat->Nlow = 0; 767*d6dfbf8fSBarry Smith mat->Nhigh = N; 768*d6dfbf8fSBarry Smith 769*d6dfbf8fSBarry Smith 770*d6dfbf8fSBarry Smith mat->comm = comm; 7711eb62cbbSBarry Smith aij->insertmode = NotSetValues; 7721eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 7731eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 7741eb62cbbSBarry Smith 7751eb62cbbSBarry Smith if (M == -1 || N == -1) { 7761eb62cbbSBarry Smith work[0] = m; work[1] = n; 777*d6dfbf8fSBarry Smith MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 7781eb62cbbSBarry Smith if (M == -1) M = sum[0]; 7791eb62cbbSBarry Smith if (N == -1) N = sum[1]; 7801eb62cbbSBarry Smith } 7811eb62cbbSBarry Smith if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 7821eb62cbbSBarry Smith if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 7838a729477SBarry Smith aij->m = m; 7848a729477SBarry Smith aij->n = n; 7851eb62cbbSBarry Smith aij->N = N; 7861eb62cbbSBarry Smith aij->M = M; 7871eb62cbbSBarry Smith 7881eb62cbbSBarry Smith /* build local table of row and column ownerships */ 7891eb62cbbSBarry Smith aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 7901eb62cbbSBarry Smith CHKPTR(aij->rowners); 7911eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 7921eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 7931eb62cbbSBarry Smith aij->rowners[0] = 0; 7941eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 7951eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 7968a729477SBarry Smith } 7971eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 7981eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 7991eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 8001eb62cbbSBarry Smith aij->cowners[0] = 0; 8011eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 8021eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 8038a729477SBarry Smith } 8041eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 8051eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 8068a729477SBarry Smith 8078a729477SBarry Smith 8081eb62cbbSBarry Smith ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 8091eb62cbbSBarry Smith ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 8108a729477SBarry Smith 8111eb62cbbSBarry Smith /* build cache for off array entries formed */ 8121eb62cbbSBarry Smith aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 8131eb62cbbSBarry Smith aij->stash.n = 0; 8141eb62cbbSBarry Smith aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 8151eb62cbbSBarry Smith sizeof(Scalar))); CHKPTR(aij->stash.array); 8161eb62cbbSBarry Smith aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 8171eb62cbbSBarry Smith aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 8188a729477SBarry Smith 8191eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 8201eb62cbbSBarry Smith aij->lvec = 0; 8211eb62cbbSBarry Smith aij->Mvctx = 0; 822*d6dfbf8fSBarry Smith aij->assembled = 0; 8238a729477SBarry Smith 824*d6dfbf8fSBarry Smith *newmat = mat; 825*d6dfbf8fSBarry Smith return 0; 826*d6dfbf8fSBarry Smith } 827c74985f6SBarry Smith 828*d6dfbf8fSBarry Smith static int MatiCopy(Mat matin,Mat *newmat) 829*d6dfbf8fSBarry Smith { 830*d6dfbf8fSBarry Smith Mat mat; 831*d6dfbf8fSBarry Smith Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 832*d6dfbf8fSBarry Smith int i,rl,len, m = oldmat->m, n = oldmat->n,ierr; 833*d6dfbf8fSBarry Smith *newmat = 0; 834*d6dfbf8fSBarry Smith 835*d6dfbf8fSBarry Smith if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 836*d6dfbf8fSBarry Smith CREATEHEADER(mat,_Mat); 837*d6dfbf8fSBarry Smith mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 838*d6dfbf8fSBarry Smith mat->cookie = MAT_COOKIE; 839*d6dfbf8fSBarry Smith mat->ops = &MatOps; 840*d6dfbf8fSBarry Smith mat->destroy = MatiAIJdestroy; 841*d6dfbf8fSBarry Smith mat->view = MatiView; 842*d6dfbf8fSBarry Smith mat->type = MATAIJSEQ; 843*d6dfbf8fSBarry Smith mat->factor = matin->factor; 844*d6dfbf8fSBarry Smith mat->row = 0; 845*d6dfbf8fSBarry Smith mat->col = 0; 846*d6dfbf8fSBarry Smith mat->outofrange = matin->outofrange; 847*d6dfbf8fSBarry Smith mat->Mlow = matin->Mlow; 848*d6dfbf8fSBarry Smith mat->Mhigh = matin->Mhigh; 849*d6dfbf8fSBarry Smith mat->Nlow = matin->Nlow; 850*d6dfbf8fSBarry Smith mat->Nhigh = matin->Nhigh; 851*d6dfbf8fSBarry Smith 852*d6dfbf8fSBarry Smith aij->m = oldmat->m; 853*d6dfbf8fSBarry Smith aij->n = oldmat->n; 854*d6dfbf8fSBarry Smith aij->M = oldmat->M; 855*d6dfbf8fSBarry Smith aij->N = oldmat->N; 856*d6dfbf8fSBarry Smith 857*d6dfbf8fSBarry Smith aij->assembled = 1; 858*d6dfbf8fSBarry Smith aij->rstart = oldmat->rstart; 859*d6dfbf8fSBarry Smith aij->rend = oldmat->rend; 860*d6dfbf8fSBarry Smith aij->cstart = oldmat->cstart; 861*d6dfbf8fSBarry Smith aij->cend = oldmat->cend; 862*d6dfbf8fSBarry Smith aij->numtids = oldmat->numtids; 863*d6dfbf8fSBarry Smith aij->mytid = oldmat->mytid; 864*d6dfbf8fSBarry Smith aij->insertmode = NotSetValues; 865*d6dfbf8fSBarry Smith 866*d6dfbf8fSBarry Smith aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 867*d6dfbf8fSBarry Smith CHKPTR(aij->rowners); 868*d6dfbf8fSBarry Smith MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 869*d6dfbf8fSBarry Smith aij->stash.nmax = 0; 870*d6dfbf8fSBarry Smith aij->stash.n = 0; 871*d6dfbf8fSBarry Smith aij->stash.array= 0; 872*d6dfbf8fSBarry Smith mat->comm = matin->comm; 873*d6dfbf8fSBarry Smith 874*d6dfbf8fSBarry Smith ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 875*d6dfbf8fSBarry Smith ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 876*d6dfbf8fSBarry Smith ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 877*d6dfbf8fSBarry Smith ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 8788a729477SBarry Smith *newmat = mat; 8798a729477SBarry Smith return 0; 8808a729477SBarry Smith } 881