18a729477SBarry Smith 21eb62cbbSBarry Smith #include "mpiaij.h" 38a729477SBarry Smith #include "vec/vecimpl.h" 4d6dfbf8fSBarry Smith #include "inline/spops.h" 58a729477SBarry Smith 61eb62cbbSBarry Smith #define CHUNCKSIZE 100 71eb62cbbSBarry Smith /* 81eb62cbbSBarry Smith This is a simple minded stash. Do a linear search to determine if 91eb62cbbSBarry Smith in stash, if not add to end. 101eb62cbbSBarry Smith */ 111eb62cbbSBarry Smith static int StashValues(Stash *stash,int row,int n, int *idxn, 121eb62cbbSBarry Smith Scalar *values,InsertMode addv) 138a729477SBarry Smith { 141eb62cbbSBarry Smith int i,j,N = stash->n,found,*n_idx, *n_idy; 151eb62cbbSBarry Smith Scalar val,*n_array; 168a729477SBarry Smith 171eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 181eb62cbbSBarry Smith found = 0; 191eb62cbbSBarry Smith val = *values++; 208a729477SBarry Smith for ( j=0; j<N; j++ ) { 211eb62cbbSBarry Smith if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 221eb62cbbSBarry Smith /* found a match */ 231eb62cbbSBarry Smith if (addv == AddValues) stash->array[j] += val; 241eb62cbbSBarry Smith else stash->array[j] = val; 251eb62cbbSBarry Smith found = 1; 268a729477SBarry Smith break; 278a729477SBarry Smith } 288a729477SBarry Smith } 291eb62cbbSBarry Smith if (!found) { /* not found so add to end */ 301eb62cbbSBarry Smith if ( stash->n == stash->nmax ) { 311eb62cbbSBarry Smith /* allocate a larger stash */ 321eb62cbbSBarry Smith n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 331eb62cbbSBarry Smith 2*sizeof(int) + sizeof(Scalar))); 341eb62cbbSBarry Smith CHKPTR(n_array); 351eb62cbbSBarry Smith n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 361eb62cbbSBarry Smith n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 371eb62cbbSBarry Smith MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 381eb62cbbSBarry Smith MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 391eb62cbbSBarry Smith MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 401eb62cbbSBarry Smith if (stash->array) FREE(stash->array); 411eb62cbbSBarry Smith stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 421eb62cbbSBarry Smith stash->nmax += CHUNCKSIZE; 431eb62cbbSBarry Smith } 441eb62cbbSBarry Smith stash->array[stash->n] = val; 451eb62cbbSBarry Smith stash->idx[stash->n] = row; 461eb62cbbSBarry Smith stash->idy[stash->n++] = idxn[i]; 471eb62cbbSBarry Smith } 488a729477SBarry Smith } 498a729477SBarry Smith return 0; 508a729477SBarry Smith } 518a729477SBarry Smith 521eb62cbbSBarry Smith static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 531eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 548a729477SBarry Smith { 551eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 561eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 571eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 588a729477SBarry Smith 591eb62cbbSBarry Smith if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 601eb62cbbSBarry Smith SETERR(1,"You cannot mix inserts and adds"); 618a729477SBarry Smith } 621eb62cbbSBarry Smith aij->insertmode = addv; 638a729477SBarry Smith for ( i=0; i<m; i++ ) { 64*da3a660dSBarry Smith if (idxm[i] < 0) SETERR(1,"Negative row index"); 65*da3a660dSBarry Smith if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 661eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 671eb62cbbSBarry Smith row = idxm[i] - rstart; 681eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 69*da3a660dSBarry Smith if (idxn[j] < 0) SETERR(1,"Negative column index"); 70*da3a660dSBarry Smith if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 711eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 721eb62cbbSBarry Smith col = idxn[j] - cstart; 731eb62cbbSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 741eb62cbbSBarry Smith } 751eb62cbbSBarry Smith else { 76d6dfbf8fSBarry Smith if (aij->assembled) { 77d6dfbf8fSBarry Smith SETERR(1,"Cannot insert off diagonal block in already\ 78d6dfbf8fSBarry Smith assembled matrix. Contact petsc-maint@mcs.anl.gov\ 79d6dfbf8fSBarry Smith if your need this feature"); 80d6dfbf8fSBarry Smith } 811eb62cbbSBarry Smith col = idxn[j]; 821eb62cbbSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 831eb62cbbSBarry Smith } 841eb62cbbSBarry Smith } 851eb62cbbSBarry Smith } 861eb62cbbSBarry Smith else { 871eb62cbbSBarry Smith ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 881eb62cbbSBarry Smith } 898a729477SBarry Smith } 908a729477SBarry Smith return 0; 918a729477SBarry Smith } 928a729477SBarry Smith 938a729477SBarry Smith /* 941eb62cbbSBarry Smith the assembly code is alot like the code for vectors, we should 951eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 961eb62cbbSBarry Smith either case. 978a729477SBarry Smith */ 988a729477SBarry Smith 991eb62cbbSBarry Smith static int MatiAIJBeginAssemble(Mat mat) 1008a729477SBarry Smith { 1011eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 102d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 1031eb62cbbSBarry Smith int ierr, numtids = aij->numtids, *owners = aij->rowners; 1041eb62cbbSBarry Smith int mytid = aij->mytid; 1051eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1061eb62cbbSBarry Smith int *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work; 1071eb62cbbSBarry Smith int tag = 50, *owner,*starts,count; 1081eb62cbbSBarry Smith InsertMode addv; 1091eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1101eb62cbbSBarry Smith 1111eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 1121eb62cbbSBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT, 1131eb62cbbSBarry Smith MPI_BOR,comm); 1141eb62cbbSBarry Smith if (addv == (AddValues|InsertValues)) { 1151eb62cbbSBarry Smith SETERR(1,"Some processors have inserted while others have added"); 1161eb62cbbSBarry Smith } 1171eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1181eb62cbbSBarry Smith 1191eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1201eb62cbbSBarry Smith nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 1211eb62cbbSBarry Smith MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 1221eb62cbbSBarry Smith owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 1231eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1241eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1251eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1261eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1271eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1288a729477SBarry Smith } 1298a729477SBarry Smith } 1308a729477SBarry Smith } 1311eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1321eb62cbbSBarry Smith 1331eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1341eb62cbbSBarry Smith work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 1351eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1361eb62cbbSBarry Smith nreceives = work[mytid]; 1371eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1381eb62cbbSBarry Smith nmax = work[mytid]; 1391eb62cbbSBarry Smith FREE(work); 1401eb62cbbSBarry Smith 1411eb62cbbSBarry Smith /* post receives: 1421eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1431eb62cbbSBarry Smith (global index,value) we store the global index as a double 144d6dfbf8fSBarry Smith to simplify the message passing. 1451eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1461eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1471eb62cbbSBarry Smith this is a lot of wasted space. 1481eb62cbbSBarry Smith 1491eb62cbbSBarry Smith 1501eb62cbbSBarry Smith This could be done better. 1511eb62cbbSBarry Smith */ 1521eb62cbbSBarry Smith rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar)); 1531eb62cbbSBarry Smith CHKPTR(rvalues); 1541eb62cbbSBarry Smith recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 1551eb62cbbSBarry Smith CHKPTR(recv_waits); 1561eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 1571eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 1581eb62cbbSBarry Smith comm,recv_waits+i); 1591eb62cbbSBarry Smith } 1601eb62cbbSBarry Smith 1611eb62cbbSBarry Smith /* do sends: 1621eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1631eb62cbbSBarry Smith the ith processor 1641eb62cbbSBarry Smith */ 1651eb62cbbSBarry Smith svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 1661eb62cbbSBarry Smith CHKPTR(svalues); 1671eb62cbbSBarry Smith send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 1681eb62cbbSBarry Smith CHKPTR(send_waits); 1691eb62cbbSBarry Smith starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 1701eb62cbbSBarry Smith starts[0] = 0; 1711eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1721eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1731eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1741eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1751eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1761eb62cbbSBarry Smith } 1771eb62cbbSBarry Smith FREE(owner); 1781eb62cbbSBarry Smith starts[0] = 0; 1791eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1801eb62cbbSBarry Smith count = 0; 1811eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1821eb62cbbSBarry Smith if (procs[i]) { 1831eb62cbbSBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 1841eb62cbbSBarry Smith comm,send_waits+count++); 1851eb62cbbSBarry Smith } 1861eb62cbbSBarry Smith } 1871eb62cbbSBarry Smith FREE(starts); FREE(nprocs); 1881eb62cbbSBarry Smith 1891eb62cbbSBarry Smith /* Free cache space */ 1901eb62cbbSBarry Smith aij->stash.nmax = aij->stash.n = 0; 1911eb62cbbSBarry Smith if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 1921eb62cbbSBarry Smith 1931eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1941eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1951eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1961eb62cbbSBarry Smith aij->rmax = nmax; 1971eb62cbbSBarry Smith 1981eb62cbbSBarry Smith return 0; 1991eb62cbbSBarry Smith } 2001eb62cbbSBarry Smith extern int MPIAIJSetUpMultiply(Mat); 2011eb62cbbSBarry Smith 2021eb62cbbSBarry Smith static int MatiAIJEndAssemble(Mat mat) 2031eb62cbbSBarry Smith { 2041eb62cbbSBarry Smith int ierr; 2051eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 2061eb62cbbSBarry Smith 2071eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 208d6dfbf8fSBarry Smith int imdex,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n; 2091eb62cbbSBarry Smith int row,col; 2101eb62cbbSBarry Smith Scalar *values,val; 2111eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2121eb62cbbSBarry Smith 2131eb62cbbSBarry Smith /* wait on receives */ 2141eb62cbbSBarry Smith while (count) { 215d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2161eb62cbbSBarry Smith /* unpack receives into our local space */ 217d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 2181eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_SCALAR,&n); 2191eb62cbbSBarry Smith n = n/3; 2201eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2211eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2221eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2231eb62cbbSBarry Smith val = values[3*i+2]; 2241eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2251eb62cbbSBarry Smith col -= aij->cstart; 2261eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2271eb62cbbSBarry Smith } 2281eb62cbbSBarry Smith else { 229d6dfbf8fSBarry Smith if (aij->assembled) { 230d6dfbf8fSBarry Smith SETERR(1,"Cannot insert off diagonal block in already\ 231d6dfbf8fSBarry Smith assembled matrix. Contact petsc-maint@mcs.anl.gov\ 232d6dfbf8fSBarry Smith if your need this feature"); 233d6dfbf8fSBarry Smith } 2341eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2351eb62cbbSBarry Smith } 2361eb62cbbSBarry Smith } 2371eb62cbbSBarry Smith count--; 2381eb62cbbSBarry Smith } 2391eb62cbbSBarry Smith FREE(aij->recv_waits); FREE(aij->rvalues); 2401eb62cbbSBarry Smith 2411eb62cbbSBarry Smith /* wait on sends */ 2421eb62cbbSBarry Smith if (aij->nsends) { 2431eb62cbbSBarry Smith send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 2441eb62cbbSBarry Smith CHKPTR(send_status); 2451eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2461eb62cbbSBarry Smith FREE(send_status); 2471eb62cbbSBarry Smith } 2481eb62cbbSBarry Smith FREE(aij->send_waits); FREE(aij->svalues); 2491eb62cbbSBarry Smith 2501eb62cbbSBarry Smith aij->insertmode = NotSetValues; 2511eb62cbbSBarry Smith ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 2521eb62cbbSBarry Smith ierr = MatEndAssembly(aij->A); CHKERR(ierr); 2531eb62cbbSBarry Smith 254d6dfbf8fSBarry Smith 255d6dfbf8fSBarry Smith if (aij->assembled) { 256d6dfbf8fSBarry Smith /* this is because the present implimentation doesn't support 257d6dfbf8fSBarry Smith inserting values off the diagonal block.*/ 258d6dfbf8fSBarry Smith return 0; 259d6dfbf8fSBarry Smith } 260d6dfbf8fSBarry Smith 2611eb62cbbSBarry Smith ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 2621eb62cbbSBarry Smith ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 2631eb62cbbSBarry Smith ierr = MatEndAssembly(aij->B); CHKERR(ierr); 264d6dfbf8fSBarry Smith 265d6dfbf8fSBarry Smith aij->assembled = 1; 2668a729477SBarry Smith return 0; 2678a729477SBarry Smith } 2688a729477SBarry Smith 2691eb62cbbSBarry Smith static int MatiZero(Mat A) 2701eb62cbbSBarry Smith { 2711eb62cbbSBarry Smith Matimpiaij *l = (Matimpiaij *) A->data; 2721eb62cbbSBarry Smith 2731eb62cbbSBarry Smith MatZeroEntries(l->A); MatZeroEntries(l->B); 2741eb62cbbSBarry Smith return 0; 2751eb62cbbSBarry Smith } 2761eb62cbbSBarry Smith 2771eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2781eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2791eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2801eb62cbbSBarry Smith 2811eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2821eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2831eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2841eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2851eb62cbbSBarry Smith routine. 2861eb62cbbSBarry Smith */ 2871eb62cbbSBarry Smith static int MatiZerorows(Mat A,IS is,Scalar *diag) 2881eb62cbbSBarry Smith { 2891eb62cbbSBarry Smith Matimpiaij *l = (Matimpiaij *) A->data; 2901eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2911eb62cbbSBarry Smith int *localrows,*procs,*nprocs,j,found,idx,nsends,*work; 2921eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2931eb62cbbSBarry Smith int *rvalues,tag = 67,count,base,slen,n,len,*source; 294d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 295d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2961eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2971eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2981eb62cbbSBarry Smith IS istmp; 2991eb62cbbSBarry Smith 300*da3a660dSBarry Smith if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first"); 3011eb62cbbSBarry Smith ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 3021eb62cbbSBarry Smith ierr = ISGetIndices(is,&rows); CHKERR(ierr); 3031eb62cbbSBarry Smith 3041eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3051eb62cbbSBarry Smith nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 3061eb62cbbSBarry Smith MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 3071eb62cbbSBarry Smith owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 3081eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3091eb62cbbSBarry Smith idx = rows[i]; 3101eb62cbbSBarry Smith found = 0; 3111eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 3121eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3131eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3141eb62cbbSBarry Smith } 3151eb62cbbSBarry Smith } 316d6dfbf8fSBarry Smith if (!found) SETERR(1,"Imdex out of range"); 3171eb62cbbSBarry Smith } 3181eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3191eb62cbbSBarry Smith 3201eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3211eb62cbbSBarry Smith work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 3221eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3231eb62cbbSBarry Smith nrecvs = work[mytid]; 3241eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3251eb62cbbSBarry Smith nmax = work[mytid]; 3261eb62cbbSBarry Smith FREE(work); 3271eb62cbbSBarry Smith 3281eb62cbbSBarry Smith /* post receives: */ 3291eb62cbbSBarry Smith rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */ 3301eb62cbbSBarry Smith CHKPTR(rvalues); 3311eb62cbbSBarry Smith recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 3321eb62cbbSBarry Smith CHKPTR(recv_waits); 3331eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3341eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3351eb62cbbSBarry Smith comm,recv_waits+i); 3361eb62cbbSBarry Smith } 3371eb62cbbSBarry Smith 3381eb62cbbSBarry Smith /* do sends: 3391eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3401eb62cbbSBarry Smith the ith processor 3411eb62cbbSBarry Smith */ 3421eb62cbbSBarry Smith svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 3431eb62cbbSBarry Smith send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 3441eb62cbbSBarry Smith CHKPTR(send_waits); 3451eb62cbbSBarry Smith starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 3461eb62cbbSBarry Smith starts[0] = 0; 3471eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3481eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3491eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3501eb62cbbSBarry Smith } 3511eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith starts[0] = 0; 3541eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3551eb62cbbSBarry Smith count = 0; 3561eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3571eb62cbbSBarry Smith if (procs[i]) { 3581eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3591eb62cbbSBarry Smith comm,send_waits+count++); 3601eb62cbbSBarry Smith } 3611eb62cbbSBarry Smith } 3621eb62cbbSBarry Smith FREE(starts); 3631eb62cbbSBarry Smith 3641eb62cbbSBarry Smith base = owners[mytid]; 3651eb62cbbSBarry Smith 3661eb62cbbSBarry Smith /* wait on receives */ 3671eb62cbbSBarry Smith lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 3681eb62cbbSBarry Smith source = lens + nrecvs; 3691eb62cbbSBarry Smith count = nrecvs; slen = 0; 3701eb62cbbSBarry Smith while (count) { 371d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3721eb62cbbSBarry Smith /* unpack receives into our local space */ 3731eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 374d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 375d6dfbf8fSBarry Smith lens[imdex] = n; 3761eb62cbbSBarry Smith slen += n; 3771eb62cbbSBarry Smith count--; 3781eb62cbbSBarry Smith } 3791eb62cbbSBarry Smith FREE(recv_waits); 3801eb62cbbSBarry Smith 3811eb62cbbSBarry Smith /* move the data into the send scatter */ 3821eb62cbbSBarry Smith lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 3831eb62cbbSBarry Smith count = 0; 3841eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3851eb62cbbSBarry Smith values = rvalues + i*nmax; 3861eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3871eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3881eb62cbbSBarry Smith } 3891eb62cbbSBarry Smith } 3901eb62cbbSBarry Smith FREE(rvalues); FREE(lens); 3911eb62cbbSBarry Smith FREE(owner); FREE(nprocs); 3921eb62cbbSBarry Smith 3931eb62cbbSBarry Smith /* actually zap the local rows */ 3941eb62cbbSBarry Smith ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 3951eb62cbbSBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 3961eb62cbbSBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 3971eb62cbbSBarry Smith ierr = ISDestroy(istmp); CHKERR(ierr); 3981eb62cbbSBarry Smith 3991eb62cbbSBarry Smith /* wait on sends */ 4001eb62cbbSBarry Smith if (nsends) { 4011eb62cbbSBarry Smith send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 4021eb62cbbSBarry Smith CHKPTR(send_status); 4031eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4041eb62cbbSBarry Smith FREE(send_status); 4051eb62cbbSBarry Smith } 4061eb62cbbSBarry Smith FREE(send_waits); FREE(svalues); 4071eb62cbbSBarry Smith 4081eb62cbbSBarry Smith 4091eb62cbbSBarry Smith return 0; 4101eb62cbbSBarry Smith } 4111eb62cbbSBarry Smith 4121eb62cbbSBarry Smith static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 4131eb62cbbSBarry Smith { 4141eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 4151eb62cbbSBarry Smith int ierr; 416*da3a660dSBarry Smith if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 417d6dfbf8fSBarry Smith ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 4181eb62cbbSBarry Smith CHKERR(ierr); 4191eb62cbbSBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 420d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 421d6dfbf8fSBarry Smith CHKERR(ierr); 4221eb62cbbSBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 4231eb62cbbSBarry Smith return 0; 4241eb62cbbSBarry Smith } 4251eb62cbbSBarry Smith 426*da3a660dSBarry Smith static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz) 427*da3a660dSBarry Smith { 428*da3a660dSBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 429*da3a660dSBarry Smith int ierr; 430*da3a660dSBarry Smith if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 431*da3a660dSBarry Smith ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 432*da3a660dSBarry Smith CHKERR(ierr); 433*da3a660dSBarry Smith ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 434*da3a660dSBarry Smith ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 435*da3a660dSBarry Smith CHKERR(ierr); 436*da3a660dSBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 437*da3a660dSBarry Smith return 0; 438*da3a660dSBarry Smith } 439*da3a660dSBarry Smith 440*da3a660dSBarry Smith static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy) 441*da3a660dSBarry Smith { 442*da3a660dSBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 443*da3a660dSBarry Smith int ierr; 444*da3a660dSBarry Smith 445*da3a660dSBarry Smith if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 446*da3a660dSBarry Smith /* do nondiagonal part */ 447*da3a660dSBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 448*da3a660dSBarry Smith /* send it on its way */ 449*da3a660dSBarry Smith ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 450*da3a660dSBarry Smith ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 451*da3a660dSBarry Smith /* do local part */ 452*da3a660dSBarry Smith ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 453*da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 454*da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 455*da3a660dSBarry Smith /* but is not perhaps always true. */ 456*da3a660dSBarry Smith ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 457*da3a660dSBarry Smith aij->Mvctx); CHKERR(ierr); 458*da3a660dSBarry Smith return 0; 459*da3a660dSBarry Smith } 460*da3a660dSBarry Smith 461*da3a660dSBarry Smith static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz) 462*da3a660dSBarry Smith { 463*da3a660dSBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 464*da3a660dSBarry Smith int ierr; 465*da3a660dSBarry Smith 466*da3a660dSBarry Smith if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 467*da3a660dSBarry Smith /* do nondiagonal part */ 468*da3a660dSBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 469*da3a660dSBarry Smith /* send it on its way */ 470*da3a660dSBarry Smith ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 471*da3a660dSBarry Smith ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 472*da3a660dSBarry Smith /* do local part */ 473*da3a660dSBarry Smith ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 474*da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 475*da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 476*da3a660dSBarry Smith /* but is not perhaps always true. */ 477*da3a660dSBarry Smith ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 478*da3a660dSBarry Smith aij->Mvctx); CHKERR(ierr); 479*da3a660dSBarry Smith return 0; 480*da3a660dSBarry Smith } 481*da3a660dSBarry Smith 4821eb62cbbSBarry Smith /* 4831eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4841eb62cbbSBarry Smith diagonal block 4851eb62cbbSBarry Smith */ 4861eb62cbbSBarry Smith static int MatiAIJgetdiag(Mat Ain,Vec v) 4871eb62cbbSBarry Smith { 4881eb62cbbSBarry Smith Matimpiaij *A = (Matimpiaij *) Ain->data; 489*da3a660dSBarry Smith if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first"); 4901eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4911eb62cbbSBarry Smith } 4921eb62cbbSBarry Smith 4931eb62cbbSBarry Smith static int MatiAIJdestroy(PetscObject obj) 4941eb62cbbSBarry Smith { 4951eb62cbbSBarry Smith Mat mat = (Mat) obj; 4961eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 4971eb62cbbSBarry Smith int ierr; 4981eb62cbbSBarry Smith FREE(aij->rowners); 4991eb62cbbSBarry Smith ierr = MatDestroy(aij->A); CHKERR(ierr); 5001eb62cbbSBarry Smith ierr = MatDestroy(aij->B); CHKERR(ierr); 5011eb62cbbSBarry Smith FREE(aij); FREE(mat); 5021eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 5031eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 5041eb62cbbSBarry Smith return 0; 5051eb62cbbSBarry Smith } 5061eb62cbbSBarry Smith 5071eb62cbbSBarry Smith static int MatiView(PetscObject obj,Viewer viewer) 5081eb62cbbSBarry Smith { 5091eb62cbbSBarry Smith Mat mat = (Mat) obj; 5101eb62cbbSBarry Smith Matimpiaij *aij = (Matimpiaij *) mat->data; 5111eb62cbbSBarry Smith int ierr; 5121eb62cbbSBarry Smith 513*da3a660dSBarry Smith if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 514d6dfbf8fSBarry Smith MPE_Seq_begin(mat->comm,1); 5151eb62cbbSBarry Smith printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 5161eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5171eb62cbbSBarry Smith aij->cend); 5181eb62cbbSBarry Smith ierr = MatView(aij->A,viewer); CHKERR(ierr); 5191eb62cbbSBarry Smith ierr = MatView(aij->B,viewer); CHKERR(ierr); 520d6dfbf8fSBarry Smith MPE_Seq_end(mat->comm,1); 5211eb62cbbSBarry Smith return 0; 5221eb62cbbSBarry Smith } 5231eb62cbbSBarry Smith 524d6dfbf8fSBarry Smith extern int MatiAIJmarkdiag(Matiaij *); 5251eb62cbbSBarry Smith /* 5261eb62cbbSBarry Smith This has to provide several versions. 5271eb62cbbSBarry Smith 5281eb62cbbSBarry Smith 1) per sequential 5291eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 5301eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 531d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 5321eb62cbbSBarry Smith */ 533d6dfbf8fSBarry Smith static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 5348a729477SBarry Smith int its,Vec xx) 5358a729477SBarry Smith { 5361eb62cbbSBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 537d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 538d6dfbf8fSBarry Smith Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 539*da3a660dSBarry Smith Scalar zero = 0.0,*b,*x,*w,*xs,*ls,d,*v,sum,scale,*t,*ts; 540d6dfbf8fSBarry Smith int ierr,one = 1, tmp, *idx, *diag,mytid; 5418a729477SBarry Smith int n = mat->n, m = mat->m, i, j; 542*da3a660dSBarry Smith Vec tt; 5438a729477SBarry Smith 544*da3a660dSBarry Smith if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first"); 5451eb62cbbSBarry Smith 546d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 547d6dfbf8fSBarry Smith xs = x -1; /* shift by one for index start of 1 */ 548*da3a660dSBarry Smith ls--; 549d6dfbf8fSBarry Smith if (!A->diag) {if (ierr = MatiAIJmarkdiag(A)) return ierr;} 550d6dfbf8fSBarry Smith diag = A->diag; 551*da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 552*da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 553*da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 554*da3a660dSBarry Smith 555*da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 556*da3a660dSBarry Smith 557*da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 558*da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 559*da3a660dSBarry Smith is the relaxation factor. 560*da3a660dSBarry Smith */ 561*da3a660dSBarry Smith ierr = VecCreate(xx,&tt); CHKERR(ierr); 562*da3a660dSBarry Smith VecGetArray(tt,&t); 563*da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 564*da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 565*da3a660dSBarry Smith VecSet(&zero,mat->lvec); 566*da3a660dSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 567*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 568*da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 569*da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 570*da3a660dSBarry Smith idx = A->j + diag[i]; 571*da3a660dSBarry Smith v = A->a + diag[i]; 572*da3a660dSBarry Smith sum = b[i]; 573*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 574*da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 575*da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 576*da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 577*da3a660dSBarry Smith v = B->a + B->i[i] - 1; 578*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 579*da3a660dSBarry Smith x[i] = omega*(sum/d); 580*da3a660dSBarry Smith } 581*da3a660dSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 582*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 583*da3a660dSBarry Smith 584*da3a660dSBarry Smith /* t = b - (2*E - D)x */ 585*da3a660dSBarry Smith v = A->a; 586*da3a660dSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 587*da3a660dSBarry Smith 588*da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 589*da3a660dSBarry Smith ts = t - 1; /* shifted by one for index start of a or mat->j*/ 590*da3a660dSBarry Smith diag = A->diag; 591*da3a660dSBarry Smith VecSet(&zero,mat->lvec); 592*da3a660dSBarry Smith ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 593*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 594*da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 595*da3a660dSBarry Smith n = diag[i] - A->i[i]; 596*da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 597*da3a660dSBarry Smith v = A->a + A->i[i] - 1; 598*da3a660dSBarry Smith sum = t[i]; 599*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 600*da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 601*da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 602*da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 603*da3a660dSBarry Smith v = B->a + B->i[i] - 1; 604*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 605*da3a660dSBarry Smith t[i] = omega*(sum/d); 606*da3a660dSBarry Smith } 607*da3a660dSBarry Smith ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 608*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 609*da3a660dSBarry Smith /* x = x + t */ 610*da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 611*da3a660dSBarry Smith VecDestroy(tt); 612*da3a660dSBarry Smith return 0; 613*da3a660dSBarry Smith } 614*da3a660dSBarry Smith 6151eb62cbbSBarry Smith 616d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 617*da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 618*da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 619*da3a660dSBarry Smith } 620*da3a660dSBarry Smith else { 621d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 622d6dfbf8fSBarry Smith CHKERR(ierr); 623d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 624d6dfbf8fSBarry Smith CHKERR(ierr); 625*da3a660dSBarry Smith } 626d6dfbf8fSBarry Smith while (its--) { 627d6dfbf8fSBarry Smith /* go down through the rows */ 628d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 629d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 630d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 631d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 632d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 633d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 634d6dfbf8fSBarry Smith sum = b[i]; 635d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 636d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 637d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 638d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 639d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 640d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 641d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 642d6dfbf8fSBarry Smith } 643d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 644d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 645d6dfbf8fSBarry Smith /* come up through the rows */ 646d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 647d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 648d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 649d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 650d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 651d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 652d6dfbf8fSBarry Smith sum = b[i]; 653d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 654d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 655d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 656d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 657d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 658d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 659d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 660d6dfbf8fSBarry Smith } 661d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 662d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 663d6dfbf8fSBarry Smith } 664d6dfbf8fSBarry Smith } 665d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 666*da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 667*da3a660dSBarry Smith VecSet(&zero,mat->lvec); 668*da3a660dSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 669*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 670*da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 671*da3a660dSBarry Smith n = diag[i] - A->i[i]; 672*da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 673*da3a660dSBarry Smith v = A->a + A->i[i] - 1; 674*da3a660dSBarry Smith sum = b[i]; 675*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 676*da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 677*da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 678*da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 679*da3a660dSBarry Smith v = B->a + B->i[i] - 1; 680*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 681*da3a660dSBarry Smith x[i] = omega*(sum/d); 682*da3a660dSBarry Smith } 683*da3a660dSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 684*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 685*da3a660dSBarry Smith its--; 686*da3a660dSBarry Smith } 687d6dfbf8fSBarry Smith while (its--) { 688d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 689d6dfbf8fSBarry Smith CHKERR(ierr); 690d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 691d6dfbf8fSBarry Smith CHKERR(ierr); 692d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 693d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 694d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 695d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 696d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 697d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 698d6dfbf8fSBarry Smith sum = b[i]; 699d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 700d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 701d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 702d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 703d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 704d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 705d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 706d6dfbf8fSBarry Smith } 707d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 708d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 709d6dfbf8fSBarry Smith } 710d6dfbf8fSBarry Smith } 711d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 712*da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 713*da3a660dSBarry Smith VecSet(&zero,mat->lvec); 714*da3a660dSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 715*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 716*da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 717*da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 718*da3a660dSBarry Smith idx = A->j + diag[i]; 719*da3a660dSBarry Smith v = A->a + diag[i]; 720*da3a660dSBarry Smith sum = b[i]; 721*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 722*da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 723*da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 724*da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 725*da3a660dSBarry Smith v = B->a + B->i[i] - 1; 726*da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 727*da3a660dSBarry Smith x[i] = omega*(sum/d); 728*da3a660dSBarry Smith } 729*da3a660dSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 730*da3a660dSBarry Smith mat->Mvctx); CHKERR(ierr); 731*da3a660dSBarry Smith its--; 732*da3a660dSBarry Smith } 733d6dfbf8fSBarry Smith while (its--) { 734d6dfbf8fSBarry Smith ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 735d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 736d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 737d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 738d6dfbf8fSBarry Smith ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 739d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 740d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 741d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 742d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 743d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 744d6dfbf8fSBarry Smith sum = b[i]; 745d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 746d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 747d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 748d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 749d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 750d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 751d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 752d6dfbf8fSBarry Smith } 753d6dfbf8fSBarry Smith ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 754d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 755d6dfbf8fSBarry Smith } 756d6dfbf8fSBarry Smith } 757d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 758*da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 759*da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 760*da3a660dSBarry Smith } 761d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 762d6dfbf8fSBarry Smith CHKERR(ierr); 763d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 764d6dfbf8fSBarry Smith CHKERR(ierr); 765d6dfbf8fSBarry Smith while (its--) { 766d6dfbf8fSBarry Smith /* go down through the rows */ 767d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 768d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 769d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 770d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 771d6dfbf8fSBarry Smith sum = b[i]; 772d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 773d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 774d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 775d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 776d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 777d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 778d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 779d6dfbf8fSBarry Smith } 780d6dfbf8fSBarry Smith /* come up through the rows */ 781d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 782d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 783d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 784d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 785d6dfbf8fSBarry Smith sum = b[i]; 786d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 787d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 788d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 789d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 790d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 791d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 792d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 793d6dfbf8fSBarry Smith } 794d6dfbf8fSBarry Smith } 795d6dfbf8fSBarry Smith } 796d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 797*da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 798*da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 799*da3a660dSBarry Smith } 800d6dfbf8fSBarry Smith ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 801d6dfbf8fSBarry Smith CHKERR(ierr); 802d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 803d6dfbf8fSBarry Smith CHKERR(ierr); 804d6dfbf8fSBarry Smith while (its--) { 805d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 806d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 807d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 808d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 809d6dfbf8fSBarry Smith sum = b[i]; 810d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 811d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 812d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 813d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 814d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 815d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 816d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 817d6dfbf8fSBarry Smith } 818d6dfbf8fSBarry Smith } 819d6dfbf8fSBarry Smith } 820d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 821*da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 822*da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 823*da3a660dSBarry Smith } 824d6dfbf8fSBarry Smith ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 825d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 826d6dfbf8fSBarry Smith ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 827d6dfbf8fSBarry Smith mat->Mvctx); CHKERR(ierr); 828d6dfbf8fSBarry Smith while (its--) { 829d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 830d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 831d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 832d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 833d6dfbf8fSBarry Smith sum = b[i]; 834d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 835d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 836d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 837d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 838d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 839d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 840d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 841d6dfbf8fSBarry Smith } 842d6dfbf8fSBarry Smith } 843d6dfbf8fSBarry Smith } 8448a729477SBarry Smith return 0; 8458a729477SBarry Smith } 846c74985f6SBarry Smith static int MatiAIJinsopt(Mat aijin,int op) 847c74985f6SBarry Smith { 848c74985f6SBarry Smith Matimpiaij *aij = (Matimpiaij *) aijin->data; 849c74985f6SBarry Smith 850c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 851c74985f6SBarry Smith MatSetOption(aij->A,op); 852c74985f6SBarry Smith MatSetOption(aij->B,op); 853c74985f6SBarry Smith } 854c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 855c74985f6SBarry Smith MatSetOption(aij->A,op); 856c74985f6SBarry Smith MatSetOption(aij->B,op); 857c74985f6SBarry Smith } 858c74985f6SBarry Smith else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 859c74985f6SBarry Smith return 0; 860c74985f6SBarry Smith } 861c74985f6SBarry Smith 862c74985f6SBarry Smith static int MatiAIJsize(Mat matin,int *m,int *n) 863c74985f6SBarry Smith { 864c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 865c74985f6SBarry Smith *m = mat->M; *n = mat->N; 866c74985f6SBarry Smith return 0; 867c74985f6SBarry Smith } 868c74985f6SBarry Smith 869c74985f6SBarry Smith static int MatiAIJlocalsize(Mat matin,int *m,int *n) 870c74985f6SBarry Smith { 871c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 872c74985f6SBarry Smith *m = mat->m; *n = mat->n; 873c74985f6SBarry Smith return 0; 874c74985f6SBarry Smith } 875c74985f6SBarry Smith 876c74985f6SBarry Smith static int MatiAIJrange(Mat matin,int *m,int *n) 877c74985f6SBarry Smith { 878c74985f6SBarry Smith Matimpiaij *mat = (Matimpiaij *) matin->data; 879c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 880c74985f6SBarry Smith return 0; 881c74985f6SBarry Smith } 882c74985f6SBarry Smith 883d6dfbf8fSBarry Smith static int MatiCopy(Mat,Mat *); 884d6dfbf8fSBarry Smith 8858a729477SBarry Smith /* -------------------------------------------------------------------*/ 8861eb62cbbSBarry Smith static struct _MatOps MatOps = {MatiAIJInsertValues, 8878a729477SBarry Smith 0, 0, 888*da3a660dSBarry Smith MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd, 8891eb62cbbSBarry Smith 0,0,0,0, 8908a729477SBarry Smith 0,0, 8918a729477SBarry Smith MatiAIJrelax, 8928a729477SBarry Smith 0, 8931eb62cbbSBarry Smith 0,0,0, 894d6dfbf8fSBarry Smith MatiCopy, 8958a729477SBarry Smith MatiAIJgetdiag,0,0, 8961eb62cbbSBarry Smith MatiAIJBeginAssemble,MatiAIJEndAssemble, 8971eb62cbbSBarry Smith 0, 898c74985f6SBarry Smith MatiAIJinsopt,MatiZero,MatiZerorows,0, 899c74985f6SBarry Smith 0,0,0,0, 900c74985f6SBarry Smith MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 9018a729477SBarry Smith 9028a729477SBarry Smith 9038a729477SBarry Smith 9048a729477SBarry Smith /*@ 9058a729477SBarry Smith 9061eb62cbbSBarry Smith MatCreateMPIAIJ - Creates a sparse parallel matrix 9071eb62cbbSBarry Smith in AIJ format. 9088a729477SBarry Smith 9098a729477SBarry Smith Input Parameters: 9101eb62cbbSBarry Smith . comm - MPI communicator 9111eb62cbbSBarry Smith . m,n - number of local rows and columns (or -1 to have calculated) 9121eb62cbbSBarry Smith . M,N - global rows and columns (or -1 to have calculated) 9131eb62cbbSBarry Smith . d_nz - total number nonzeros in diagonal portion of matrix 9141eb62cbbSBarry Smith . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 9158a729477SBarry Smith . You must leave room for the diagonal entry even if it is zero. 9161eb62cbbSBarry Smith . o_nz - total number nonzeros in off-diagonal portion of matrix 9171eb62cbbSBarry Smith . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 9181eb62cbbSBarry Smith . or null. You must have at least one nonzero per row. 9198a729477SBarry Smith 9208a729477SBarry Smith Output parameters: 9218a729477SBarry Smith . newmat - the matrix 9228a729477SBarry Smith 9231eb62cbbSBarry Smith Keywords: matrix, aij, compressed row, sparse, parallel 9248a729477SBarry Smith @*/ 9251eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 9261eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 9278a729477SBarry Smith { 9288a729477SBarry Smith Mat mat; 9291eb62cbbSBarry Smith Matimpiaij *aij; 9301eb62cbbSBarry Smith int ierr, i,rl,len,sum[2],work[2]; 9318a729477SBarry Smith *newmat = 0; 9328a729477SBarry Smith CREATEHEADER(mat,_Mat); 9331eb62cbbSBarry Smith mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 9348a729477SBarry Smith mat->cookie = MAT_COOKIE; 9358a729477SBarry Smith mat->ops = &MatOps; 9368a729477SBarry Smith mat->destroy = MatiAIJdestroy; 9371eb62cbbSBarry Smith mat->view = MatiView; 9381eb62cbbSBarry Smith mat->type = MATAIJMPI; 9398a729477SBarry Smith mat->factor = 0; 9408a729477SBarry Smith mat->row = 0; 9418a729477SBarry Smith mat->col = 0; 942d6dfbf8fSBarry Smith 943d6dfbf8fSBarry Smith mat->comm = comm; 9441eb62cbbSBarry Smith aij->insertmode = NotSetValues; 9451eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 9461eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 9471eb62cbbSBarry Smith 9481eb62cbbSBarry Smith if (M == -1 || N == -1) { 9491eb62cbbSBarry Smith work[0] = m; work[1] = n; 950d6dfbf8fSBarry Smith MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 9511eb62cbbSBarry Smith if (M == -1) M = sum[0]; 9521eb62cbbSBarry Smith if (N == -1) N = sum[1]; 9531eb62cbbSBarry Smith } 9541eb62cbbSBarry Smith if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 9551eb62cbbSBarry Smith if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 9568a729477SBarry Smith aij->m = m; 9578a729477SBarry Smith aij->n = n; 9581eb62cbbSBarry Smith aij->N = N; 9591eb62cbbSBarry Smith aij->M = M; 9601eb62cbbSBarry Smith 9611eb62cbbSBarry Smith /* build local table of row and column ownerships */ 9621eb62cbbSBarry Smith aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 9631eb62cbbSBarry Smith CHKPTR(aij->rowners); 9641eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 9651eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 9661eb62cbbSBarry Smith aij->rowners[0] = 0; 9671eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 9681eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 9698a729477SBarry Smith } 9701eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 9711eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 9721eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 9731eb62cbbSBarry Smith aij->cowners[0] = 0; 9741eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 9751eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 9768a729477SBarry Smith } 9771eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 9781eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 9798a729477SBarry Smith 9808a729477SBarry Smith 9811eb62cbbSBarry Smith ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 9821eb62cbbSBarry Smith ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 9838a729477SBarry Smith 9841eb62cbbSBarry Smith /* build cache for off array entries formed */ 9851eb62cbbSBarry Smith aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 9861eb62cbbSBarry Smith aij->stash.n = 0; 9871eb62cbbSBarry Smith aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 9881eb62cbbSBarry Smith sizeof(Scalar))); CHKPTR(aij->stash.array); 9891eb62cbbSBarry Smith aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 9901eb62cbbSBarry Smith aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 9918a729477SBarry Smith 9921eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 9931eb62cbbSBarry Smith aij->lvec = 0; 9941eb62cbbSBarry Smith aij->Mvctx = 0; 995d6dfbf8fSBarry Smith aij->assembled = 0; 9968a729477SBarry Smith 997d6dfbf8fSBarry Smith *newmat = mat; 998d6dfbf8fSBarry Smith return 0; 999d6dfbf8fSBarry Smith } 1000c74985f6SBarry Smith 1001d6dfbf8fSBarry Smith static int MatiCopy(Mat matin,Mat *newmat) 1002d6dfbf8fSBarry Smith { 1003d6dfbf8fSBarry Smith Mat mat; 1004d6dfbf8fSBarry Smith Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 1005d6dfbf8fSBarry Smith int i,rl,len, m = oldmat->m, n = oldmat->n,ierr; 1006d6dfbf8fSBarry Smith *newmat = 0; 1007d6dfbf8fSBarry Smith 1008d6dfbf8fSBarry Smith if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1009d6dfbf8fSBarry Smith CREATEHEADER(mat,_Mat); 1010d6dfbf8fSBarry Smith mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 1011d6dfbf8fSBarry Smith mat->cookie = MAT_COOKIE; 1012d6dfbf8fSBarry Smith mat->ops = &MatOps; 1013d6dfbf8fSBarry Smith mat->destroy = MatiAIJdestroy; 1014d6dfbf8fSBarry Smith mat->view = MatiView; 1015d6dfbf8fSBarry Smith mat->type = MATAIJSEQ; 1016d6dfbf8fSBarry Smith mat->factor = matin->factor; 1017d6dfbf8fSBarry Smith mat->row = 0; 1018d6dfbf8fSBarry Smith mat->col = 0; 1019d6dfbf8fSBarry Smith 1020d6dfbf8fSBarry Smith aij->m = oldmat->m; 1021d6dfbf8fSBarry Smith aij->n = oldmat->n; 1022d6dfbf8fSBarry Smith aij->M = oldmat->M; 1023d6dfbf8fSBarry Smith aij->N = oldmat->N; 1024d6dfbf8fSBarry Smith 1025d6dfbf8fSBarry Smith aij->assembled = 1; 1026d6dfbf8fSBarry Smith aij->rstart = oldmat->rstart; 1027d6dfbf8fSBarry Smith aij->rend = oldmat->rend; 1028d6dfbf8fSBarry Smith aij->cstart = oldmat->cstart; 1029d6dfbf8fSBarry Smith aij->cend = oldmat->cend; 1030d6dfbf8fSBarry Smith aij->numtids = oldmat->numtids; 1031d6dfbf8fSBarry Smith aij->mytid = oldmat->mytid; 1032d6dfbf8fSBarry Smith aij->insertmode = NotSetValues; 1033d6dfbf8fSBarry Smith 1034d6dfbf8fSBarry Smith aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1035d6dfbf8fSBarry Smith CHKPTR(aij->rowners); 1036d6dfbf8fSBarry Smith MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1037d6dfbf8fSBarry Smith aij->stash.nmax = 0; 1038d6dfbf8fSBarry Smith aij->stash.n = 0; 1039d6dfbf8fSBarry Smith aij->stash.array= 0; 1040d6dfbf8fSBarry Smith mat->comm = matin->comm; 1041d6dfbf8fSBarry Smith 1042d6dfbf8fSBarry Smith ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1043d6dfbf8fSBarry Smith ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1044d6dfbf8fSBarry Smith ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1045d6dfbf8fSBarry Smith ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 10468a729477SBarry Smith *newmat = mat; 10478a729477SBarry Smith return 0; 10488a729477SBarry Smith } 1049