1cb512458SBarry Smith #ifndef lint 2*855ac2c5SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.58 1995/07/12 03:12:25 curfman Exp curfman $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1744a69424SLois Curfman McInnes Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 189e25ed09SBarry Smith int n = B->n,i; 1978b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 2078b31e54SBarry Smith PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21abc0e9e4SLois Curfman McInnes for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 229e25ed09SBarry Smith return 0; 239e25ed09SBarry Smith } 249e25ed09SBarry Smith 252493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 262493cbb0SBarry Smith 2751c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 28299609e3SLois Curfman McInnes { 29299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 30299609e3SLois Curfman McInnes int ierr; 31299609e3SLois Curfman McInnes if (aij->numtids == 1) { 3251c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 33299609e3SLois Curfman McInnes } else 34299609e3SLois Curfman McInnes SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel."); 35299609e3SLois Curfman McInnes return 0; 36299609e3SLois Curfman McInnes } 37299609e3SLois Curfman McInnes 382ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 391eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 421eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 431eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 448a729477SBarry Smith 4507a0e7adSLois Curfman McInnes if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 4678b31e54SBarry Smith SETERRQ(1,"You cannot mix inserts and adds"); 478a729477SBarry Smith } 481eb62cbbSBarry Smith aij->insertmode = addv; 498a729477SBarry Smith for ( i=0; i<m; i++ ) { 5078b31e54SBarry Smith if (idxm[i] < 0) SETERRQ(1,"Negative row index"); 5178b31e54SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large"); 521eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 531eb62cbbSBarry Smith row = idxm[i] - rstart; 541eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 5578b31e54SBarry Smith if (idxn[j] < 0) SETERRQ(1,"Negative column index"); 5678b31e54SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large"); 571eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 581eb62cbbSBarry Smith col = idxn[j] - cstart; 5978b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 601eb62cbbSBarry Smith } 611eb62cbbSBarry Smith else { 62d6dfbf8fSBarry Smith if (aij->assembled) { 63b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 649e25ed09SBarry Smith col = aij->colmap[idxn[j]] - 1; 65b7c46309SBarry Smith if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 662493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 672493cbb0SBarry Smith col = idxn[j]; 68d6dfbf8fSBarry Smith } 699e25ed09SBarry Smith } 709e25ed09SBarry Smith else col = idxn[j]; 7178b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 721eb62cbbSBarry Smith } 731eb62cbbSBarry Smith } 741eb62cbbSBarry Smith } 751eb62cbbSBarry Smith else { 76dbd7a890SLois Curfman McInnes ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 7778b31e54SBarry Smith CHKERRQ(ierr); 781eb62cbbSBarry Smith } 798a729477SBarry Smith } 808a729477SBarry Smith return 0; 818a729477SBarry Smith } 828a729477SBarry Smith 838a729477SBarry Smith /* 841eb62cbbSBarry Smith the assembly code is a lot like the code for vectors, we should 851eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 861eb62cbbSBarry Smith either case. 878a729477SBarry Smith */ 888a729477SBarry Smith 89fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 908a729477SBarry Smith { 9144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 92d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 936abc6512SBarry Smith int numtids = aij->numtids, *owners = aij->rowners; 941eb62cbbSBarry Smith int mytid = aij->mytid; 951eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 966abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 975392566eSBarry Smith int tag = mat->tag, *owner,*starts,count,ierr; 981eb62cbbSBarry Smith InsertMode addv; 991eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1001eb62cbbSBarry Smith 1011eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 10228988994SBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 1031eb62cbbSBarry Smith MPI_BOR,comm); 1049d00d63dSBarry Smith if (addv == (ADDVALUES|INSERTVALUES)) { 10578b31e54SBarry Smith SETERRQ(1,"Some processors have inserted while others have added"); 1061eb62cbbSBarry Smith } 1071eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1081eb62cbbSBarry Smith 1091eb62cbbSBarry Smith /* first count number of contributors to each processor */ 11078b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 11178b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 11278b31e54SBarry Smith owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1131eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1141eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1151eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1161eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1171eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1188a729477SBarry Smith } 1198a729477SBarry Smith } 1208a729477SBarry Smith } 1211eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1221eb62cbbSBarry Smith 1231eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 12478b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 1251eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1261eb62cbbSBarry Smith nreceives = work[mytid]; 1271eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1281eb62cbbSBarry Smith nmax = work[mytid]; 12978b31e54SBarry Smith PETSCFREE(work); 1301eb62cbbSBarry Smith 1311eb62cbbSBarry Smith /* post receives: 1321eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1331eb62cbbSBarry Smith (global index,value) we store the global index as a double 134d6dfbf8fSBarry Smith to simplify the message passing. 1351eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1361eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1371eb62cbbSBarry Smith this is a lot of wasted space. 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith 1401eb62cbbSBarry Smith This could be done better. 1411eb62cbbSBarry Smith */ 14278b31e54SBarry Smith rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 14378b31e54SBarry Smith CHKPTRQ(rvalues); 14478b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 14578b31e54SBarry Smith CHKPTRQ(recv_waits); 1461eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 147c60448a5SBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1481eb62cbbSBarry Smith comm,recv_waits+i); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith 1511eb62cbbSBarry Smith /* do sends: 1521eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1531eb62cbbSBarry Smith the ith processor 1541eb62cbbSBarry Smith */ 15578b31e54SBarry Smith svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 15678b31e54SBarry Smith CHKPTRQ(svalues); 15778b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 15878b31e54SBarry Smith CHKPTRQ(send_waits); 15978b31e54SBarry Smith starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 1601eb62cbbSBarry Smith starts[0] = 0; 1611eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1621eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1631eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1641eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1651eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1661eb62cbbSBarry Smith } 16778b31e54SBarry Smith PETSCFREE(owner); 1681eb62cbbSBarry Smith starts[0] = 0; 1691eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1701eb62cbbSBarry Smith count = 0; 1711eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1721eb62cbbSBarry Smith if (procs[i]) { 173c60448a5SBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1741eb62cbbSBarry Smith comm,send_waits+count++); 1751eb62cbbSBarry Smith } 1761eb62cbbSBarry Smith } 17778b31e54SBarry Smith PETSCFREE(starts); PETSCFREE(nprocs); 1781eb62cbbSBarry Smith 1791eb62cbbSBarry Smith /* Free cache space */ 18078b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1831eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1841eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1851eb62cbbSBarry Smith aij->rmax = nmax; 1861eb62cbbSBarry Smith 1871eb62cbbSBarry Smith return 0; 1881eb62cbbSBarry Smith } 18944a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 1901eb62cbbSBarry Smith 191fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 1921eb62cbbSBarry Smith { 1931eb62cbbSBarry Smith int ierr; 19444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1951eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 1966abc6512SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 1972493cbb0SBarry Smith int row,col,other_disassembled; 1981eb62cbbSBarry Smith Scalar *values,val; 1991eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2001eb62cbbSBarry Smith 2011eb62cbbSBarry Smith /* wait on receives */ 2021eb62cbbSBarry Smith while (count) { 203d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2041eb62cbbSBarry Smith /* unpack receives into our local space */ 205d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 206c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2071eb62cbbSBarry Smith n = n/3; 2081eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2091eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2101eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2111eb62cbbSBarry Smith val = values[3*i+2]; 2121eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2131eb62cbbSBarry Smith col -= aij->cstart; 2141eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2151eb62cbbSBarry Smith } 2161eb62cbbSBarry Smith else { 217d6dfbf8fSBarry Smith if (aij->assembled) { 218b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 2199e25ed09SBarry Smith col = aij->colmap[col] - 1; 220b7c46309SBarry Smith if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 2212493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2222493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 223d6dfbf8fSBarry Smith } 2249e25ed09SBarry Smith } 2251eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2261eb62cbbSBarry Smith } 2271eb62cbbSBarry Smith } 2281eb62cbbSBarry Smith count--; 2291eb62cbbSBarry Smith } 23078b31e54SBarry Smith PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 2311eb62cbbSBarry Smith 2321eb62cbbSBarry Smith /* wait on sends */ 2331eb62cbbSBarry Smith if (aij->nsends) { 23478b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 23578b31e54SBarry Smith CHKPTRQ(send_status); 2361eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 23778b31e54SBarry Smith PETSCFREE(send_status); 2381eb62cbbSBarry Smith } 23978b31e54SBarry Smith PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 2401eb62cbbSBarry Smith 24107a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 24278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 24378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2441eb62cbbSBarry Smith 2452493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2462493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 2472493cbb0SBarry Smith MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 2482493cbb0SBarry Smith MPI_INT,MPI_PROD,mat->comm); 2492493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2502493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2512493cbb0SBarry Smith } 2522493cbb0SBarry Smith 2535e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 25478b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 255d6dfbf8fSBarry Smith aij->assembled = 1; 2565e42470aSBarry Smith } 25778b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 25878b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2595e42470aSBarry Smith 2608a729477SBarry Smith return 0; 2618a729477SBarry Smith } 2628a729477SBarry Smith 2632ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2641eb62cbbSBarry Smith { 26544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 266dbd7a890SLois Curfman McInnes int ierr; 26778b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 26878b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2691eb62cbbSBarry Smith return 0; 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith 2721eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2731eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2741eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2771eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2781eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2791eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2801eb62cbbSBarry Smith routine. 2811eb62cbbSBarry Smith */ 28244a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 2831eb62cbbSBarry Smith { 28444a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 2851eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2866abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 2871eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2885392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 289d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 290d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2911eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2921eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2931eb62cbbSBarry Smith IS istmp; 2941eb62cbbSBarry Smith 295c8f67822SLois Curfman McInnes if (!l->assembled) 29678b31e54SBarry Smith SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first"); 29778b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 29878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2991eb62cbbSBarry Smith 3001eb62cbbSBarry Smith /* first count number of contributors to each processor */ 30178b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 30278b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 30378b31e54SBarry Smith owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3041eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3051eb62cbbSBarry Smith idx = rows[i]; 3061eb62cbbSBarry Smith found = 0; 3071eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 3081eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3091eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3101eb62cbbSBarry Smith } 3111eb62cbbSBarry Smith } 31278b31e54SBarry Smith if (!found) SETERRQ(1,"Index out of range."); 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3151eb62cbbSBarry Smith 3161eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 31778b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 3181eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3191eb62cbbSBarry Smith nrecvs = work[mytid]; 3201eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3211eb62cbbSBarry Smith nmax = work[mytid]; 32278b31e54SBarry Smith PETSCFREE(work); 3231eb62cbbSBarry Smith 3241eb62cbbSBarry Smith /* post receives: */ 32578b31e54SBarry Smith rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 32678b31e54SBarry Smith CHKPTRQ(rvalues); 32778b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 32878b31e54SBarry Smith CHKPTRQ(recv_waits); 3291eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3301eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3311eb62cbbSBarry Smith comm,recv_waits+i); 3321eb62cbbSBarry Smith } 3331eb62cbbSBarry Smith 3341eb62cbbSBarry Smith /* do sends: 3351eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3361eb62cbbSBarry Smith the ith processor 3371eb62cbbSBarry Smith */ 33878b31e54SBarry Smith svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 33978b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 34078b31e54SBarry Smith CHKPTRQ(send_waits); 34178b31e54SBarry Smith starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 3421eb62cbbSBarry Smith starts[0] = 0; 3431eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3441eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3451eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3461eb62cbbSBarry Smith } 3471eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3481eb62cbbSBarry Smith 3491eb62cbbSBarry Smith starts[0] = 0; 3501eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3511eb62cbbSBarry Smith count = 0; 3521eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3531eb62cbbSBarry Smith if (procs[i]) { 3541eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3551eb62cbbSBarry Smith comm,send_waits+count++); 3561eb62cbbSBarry Smith } 3571eb62cbbSBarry Smith } 35878b31e54SBarry Smith PETSCFREE(starts); 3591eb62cbbSBarry Smith 3601eb62cbbSBarry Smith base = owners[mytid]; 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* wait on receives */ 36378b31e54SBarry Smith lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3641eb62cbbSBarry Smith source = lens + nrecvs; 3651eb62cbbSBarry Smith count = nrecvs; slen = 0; 3661eb62cbbSBarry Smith while (count) { 367d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3681eb62cbbSBarry Smith /* unpack receives into our local space */ 3691eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 370d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 371d6dfbf8fSBarry Smith lens[imdex] = n; 3721eb62cbbSBarry Smith slen += n; 3731eb62cbbSBarry Smith count--; 3741eb62cbbSBarry Smith } 37578b31e54SBarry Smith PETSCFREE(recv_waits); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith /* move the data into the send scatter */ 37878b31e54SBarry Smith lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3791eb62cbbSBarry Smith count = 0; 3801eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3811eb62cbbSBarry Smith values = rvalues + i*nmax; 3821eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3831eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3841eb62cbbSBarry Smith } 3851eb62cbbSBarry Smith } 38678b31e54SBarry Smith PETSCFREE(rvalues); PETSCFREE(lens); 38778b31e54SBarry Smith PETSCFREE(owner); PETSCFREE(nprocs); 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* actually zap the local rows */ 3906b5873e3SBarry Smith ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 39178b31e54SBarry Smith CHKERRQ(ierr); PETSCFREE(lrows); 39278b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 39378b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 39478b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* wait on sends */ 3971eb62cbbSBarry Smith if (nsends) { 39878b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 39978b31e54SBarry Smith CHKPTRQ(send_status); 4001eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 40178b31e54SBarry Smith PETSCFREE(send_status); 4021eb62cbbSBarry Smith } 40378b31e54SBarry Smith PETSCFREE(send_waits); PETSCFREE(svalues); 4041eb62cbbSBarry Smith 4051eb62cbbSBarry Smith 4061eb62cbbSBarry Smith return 0; 4071eb62cbbSBarry Smith } 4081eb62cbbSBarry Smith 40944a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 4101eb62cbbSBarry Smith { 41144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 4121eb62cbbSBarry Smith int ierr; 41378b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 41406be10caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 41578b31e54SBarry Smith CHKERRQ(ierr); 41678b31e54SBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 41706be10caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 41878b31e54SBarry Smith CHKERRQ(ierr); 41978b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 4201eb62cbbSBarry Smith return 0; 4211eb62cbbSBarry Smith } 4221eb62cbbSBarry Smith 42344a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 424da3a660dSBarry Smith { 42544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 426da3a660dSBarry Smith int ierr; 42778b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 42806be10caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 42978b31e54SBarry Smith CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 43106be10caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 43278b31e54SBarry Smith CHKERRQ(ierr); 43378b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 434da3a660dSBarry Smith return 0; 435da3a660dSBarry Smith } 436da3a660dSBarry Smith 43744a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 438da3a660dSBarry Smith { 43944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 440da3a660dSBarry Smith int ierr; 441da3a660dSBarry Smith 44244a69424SLois Curfman McInnes if (!aij->assembled) 44378b31e54SBarry Smith SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 444da3a660dSBarry Smith /* do nondiagonal part */ 44578b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 446da3a660dSBarry Smith /* send it on its way */ 44706be10caSBarry Smith ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 44878b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 449da3a660dSBarry Smith /* do local part */ 45078b31e54SBarry Smith ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 451da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 452da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 453da3a660dSBarry Smith /* but is not perhaps always true. */ 45406be10caSBarry Smith ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 45578b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 456da3a660dSBarry Smith return 0; 457da3a660dSBarry Smith } 458da3a660dSBarry Smith 45944a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 460da3a660dSBarry Smith { 46144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 462da3a660dSBarry Smith int ierr; 463da3a660dSBarry Smith 46444a69424SLois Curfman McInnes if (!aij->assembled) 46578b31e54SBarry Smith SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 466da3a660dSBarry Smith /* do nondiagonal part */ 46778b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 468da3a660dSBarry Smith /* send it on its way */ 46906be10caSBarry Smith ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 47078b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 471da3a660dSBarry Smith /* do local part */ 47278b31e54SBarry Smith ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 473da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 474da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 475da3a660dSBarry Smith /* but is not perhaps always true. */ 47606be10caSBarry Smith ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 47778b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 478da3a660dSBarry Smith return 0; 479da3a660dSBarry Smith } 480da3a660dSBarry Smith 4811eb62cbbSBarry Smith /* 4821eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4831eb62cbbSBarry Smith diagonal block 4841eb62cbbSBarry Smith */ 485d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 4861eb62cbbSBarry Smith { 48744a69424SLois Curfman McInnes Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 48878b31e54SBarry Smith if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 4891eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4901eb62cbbSBarry Smith } 4911eb62cbbSBarry Smith 49244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 4931eb62cbbSBarry Smith { 4941eb62cbbSBarry Smith Mat mat = (Mat) obj; 49544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4961eb62cbbSBarry Smith int ierr; 497a5a9c739SBarry Smith #if defined(PETSC_LOG) 498a5a9c739SBarry Smith PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 499a5a9c739SBarry Smith #endif 50078b31e54SBarry Smith PETSCFREE(aij->rowners); 50178b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 50278b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 50378b31e54SBarry Smith if (aij->colmap) PETSCFREE(aij->colmap); 50478b31e54SBarry Smith if (aij->garray) PETSCFREE(aij->garray); 5051eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 5061eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 50778b31e54SBarry Smith PETSCFREE(aij); 508a5a9c739SBarry Smith PLogObjectDestroy(mat); 509a5a9c739SBarry Smith PETSCHEADERDESTROY(mat); 5101eb62cbbSBarry Smith return 0; 5111eb62cbbSBarry Smith } 5127857610eSBarry Smith #include "draw.h" 513ee50ffe9SBarry Smith #include "pviewer.h" 514ee50ffe9SBarry Smith 51544a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 5161eb62cbbSBarry Smith { 5171eb62cbbSBarry Smith Mat mat = (Mat) obj; 51844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5191eb62cbbSBarry Smith int ierr; 520d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 5211eb62cbbSBarry Smith 52278b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first"); 523154123eaSLois Curfman McInnes if (!viewer) { /* so that viewers may be used from debuggers */ 524154123eaSLois Curfman McInnes viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 525154123eaSLois Curfman McInnes } 526ab254492SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 5277857610eSBarry Smith if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 5284a0ce102SLois Curfman McInnes FILE *fd = ViewerFileGetPointer_Private(viewer); 5297f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 530d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 5311eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5321eb62cbbSBarry Smith aij->cend); 53378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 53478b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 535d13ab20cSBarry Smith fflush(fd); 5367f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 537d13ab20cSBarry Smith } 5387857610eSBarry Smith else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 5397857610eSBarry Smith vobj->cookie == DRAW_COOKIE) { 54095373324SBarry Smith int numtids = aij->numtids, mytid = aij->mytid; 54195373324SBarry Smith if (numtids == 1) { 54278b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 54395373324SBarry Smith } 54495373324SBarry Smith else { 54595373324SBarry Smith /* assemble the entire matrix onto first processor. */ 54695373324SBarry Smith Mat A; 5472ee70a88SLois Curfman McInnes Mat_AIJ *Aloc; 5482eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 54995373324SBarry Smith Scalar *a; 5502ee70a88SLois Curfman McInnes 55195373324SBarry Smith if (!mytid) { 55295373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 55395373324SBarry Smith } 55495373324SBarry Smith else { 55595373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 55695373324SBarry Smith } 55778b31e54SBarry Smith CHKERRQ(ierr); 55895373324SBarry Smith 55995373324SBarry Smith /* copy over the A part */ 5602ee70a88SLois Curfman McInnes Aloc = (Mat_AIJ*) aij->A->data; 5612ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 56295373324SBarry Smith row = aij->rstart; 563b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 56495373324SBarry Smith for ( i=0; i<m; i++ ) { 56507a0e7adSLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 56678b31e54SBarry Smith CHKERRQ(ierr); 56795373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 56895373324SBarry Smith } 5692ee70a88SLois Curfman McInnes aj = Aloc->j; 570b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 57195373324SBarry Smith 57295373324SBarry Smith /* copy over the B part */ 5732ee70a88SLois Curfman McInnes Aloc = (Mat_AIJ*) aij->B->data; 5742ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 57595373324SBarry Smith row = aij->rstart; 57678b31e54SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 577b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 57895373324SBarry Smith for ( i=0; i<m; i++ ) { 57907a0e7adSLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 58078b31e54SBarry Smith CHKERRQ(ierr); 58195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 58295373324SBarry Smith } 58378b31e54SBarry Smith PETSCFREE(ct); 58478b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58578b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58695373324SBarry Smith if (!mytid) { 58778b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 58895373324SBarry Smith } 58978b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 59095373324SBarry Smith } 59195373324SBarry Smith } 5921eb62cbbSBarry Smith return 0; 5931eb62cbbSBarry Smith } 5941eb62cbbSBarry Smith 595d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ *); 5961eb62cbbSBarry Smith /* 5971eb62cbbSBarry Smith This has to provide several versions. 5981eb62cbbSBarry Smith 5991eb62cbbSBarry Smith 1) per sequential 6001eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6011eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 602d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6031eb62cbbSBarry Smith */ 604fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 605fc76ce87SLois Curfman McInnes double shift,int its,Vec xx) 6068a729477SBarry Smith { 60744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 608d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 60944a69424SLois Curfman McInnes Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 6106abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6116abc6512SBarry Smith int ierr,*idx, *diag; 6126abc6512SBarry Smith int n = mat->n, m = mat->m, i; 613da3a660dSBarry Smith Vec tt; 6148a729477SBarry Smith 61578b31e54SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first"); 6161eb62cbbSBarry Smith 617d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 618d6dfbf8fSBarry Smith xs = x -1; /* shift by one for index start of 1 */ 619da3a660dSBarry Smith ls--; 620d3c9fed9SLois Curfman McInnes if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 621d6dfbf8fSBarry Smith diag = A->diag; 622acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 62378b31e54SBarry Smith SETERRQ(1,"That option not yet support for parallel AIJ matrices"); 624acb40c82SBarry Smith } 625da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 626da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 627da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 628da3a660dSBarry Smith 629da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 630da3a660dSBarry Smith 631da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 632da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 633da3a660dSBarry Smith is the relaxation factor. 634da3a660dSBarry Smith */ 63578b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 636da3a660dSBarry Smith VecGetArray(tt,&t); 637da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 638da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 639da3a660dSBarry Smith VecSet(&zero,mat->lvec); 64006be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 64178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 642da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 643da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 644da3a660dSBarry Smith idx = A->j + diag[i]; 645da3a660dSBarry Smith v = A->a + diag[i]; 646da3a660dSBarry Smith sum = b[i]; 647da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 648da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 649da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 650da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 651da3a660dSBarry Smith v = B->a + B->i[i] - 1; 652da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 653da3a660dSBarry Smith x[i] = omega*(sum/d); 654da3a660dSBarry Smith } 655edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 65678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 657da3a660dSBarry Smith 658da3a660dSBarry Smith /* t = b - (2*E - D)x */ 659da3a660dSBarry Smith v = A->a; 660da3a660dSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 661da3a660dSBarry Smith 662da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 663da3a660dSBarry Smith ts = t - 1; /* shifted by one for index start of a or mat->j*/ 664da3a660dSBarry Smith diag = A->diag; 665da3a660dSBarry Smith VecSet(&zero,mat->lvec); 66606be10caSBarry Smith ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 66778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 668da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 669da3a660dSBarry Smith n = diag[i] - A->i[i]; 670da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 671da3a660dSBarry Smith v = A->a + A->i[i] - 1; 672da3a660dSBarry Smith sum = t[i]; 673da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 674da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 675da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 676da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 677da3a660dSBarry Smith v = B->a + B->i[i] - 1; 678da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 679da3a660dSBarry Smith t[i] = omega*(sum/d); 680da3a660dSBarry Smith } 681edd2f0e1SBarry Smith ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 68278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 683da3a660dSBarry Smith /* x = x + t */ 684da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 685da3a660dSBarry Smith VecDestroy(tt); 686da3a660dSBarry Smith return 0; 687da3a660dSBarry Smith } 688da3a660dSBarry Smith 6891eb62cbbSBarry Smith 690d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 691da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 692da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 693da3a660dSBarry Smith } 694da3a660dSBarry Smith else { 69506be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 69678b31e54SBarry Smith CHKERRQ(ierr); 69706be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 69878b31e54SBarry Smith CHKERRQ(ierr); 699da3a660dSBarry Smith } 700d6dfbf8fSBarry Smith while (its--) { 701d6dfbf8fSBarry Smith /* go down through the rows */ 70206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 70378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 704d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 705d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 706d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 707d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 708d6dfbf8fSBarry Smith sum = b[i]; 709d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 710d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 711d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 712d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 713d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 714d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 715d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 716d6dfbf8fSBarry Smith } 717edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 71878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 719d6dfbf8fSBarry Smith /* come up through the rows */ 72006be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 72178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 722d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 723d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 724d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 725d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 726d6dfbf8fSBarry Smith sum = b[i]; 727d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 728d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 729d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 730d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 731d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 732d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 733d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 734d6dfbf8fSBarry Smith } 735edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 73678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 737d6dfbf8fSBarry Smith } 738d6dfbf8fSBarry Smith } 739d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 740da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 741da3a660dSBarry Smith VecSet(&zero,mat->lvec); 74206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 74378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 744da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 745da3a660dSBarry Smith n = diag[i] - A->i[i]; 746da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 747da3a660dSBarry Smith v = A->a + A->i[i] - 1; 748da3a660dSBarry Smith sum = b[i]; 749da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 750da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 751da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 752da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 753da3a660dSBarry Smith v = B->a + B->i[i] - 1; 754da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 755da3a660dSBarry Smith x[i] = omega*(sum/d); 756da3a660dSBarry Smith } 757edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 75878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 759da3a660dSBarry Smith its--; 760da3a660dSBarry Smith } 761d6dfbf8fSBarry Smith while (its--) { 76206be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 76378b31e54SBarry Smith CHKERRQ(ierr); 76406be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 76578b31e54SBarry Smith CHKERRQ(ierr); 76606be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 769d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 770d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 771d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 772d6dfbf8fSBarry Smith sum = b[i]; 773d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 774d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 775d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 776d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 777d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 778d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 779d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 780d6dfbf8fSBarry Smith } 781edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 78278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 783d6dfbf8fSBarry Smith } 784d6dfbf8fSBarry Smith } 785d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 786da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 787da3a660dSBarry Smith VecSet(&zero,mat->lvec); 78806be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 78978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 790da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 791da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 792da3a660dSBarry Smith idx = A->j + diag[i]; 793da3a660dSBarry Smith v = A->a + diag[i]; 794da3a660dSBarry Smith sum = b[i]; 795da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 796da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 797da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 798da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 799da3a660dSBarry Smith v = B->a + B->i[i] - 1; 800da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 801da3a660dSBarry Smith x[i] = omega*(sum/d); 802da3a660dSBarry Smith } 803edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 80478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 805da3a660dSBarry Smith its--; 806da3a660dSBarry Smith } 807d6dfbf8fSBarry Smith while (its--) { 80806be10caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 80978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 81006be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 81178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 81206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 81378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 814d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 815d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 816d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 817d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 818d6dfbf8fSBarry Smith sum = b[i]; 819d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 820d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 821d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 822d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 823d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 824d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 825d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 826d6dfbf8fSBarry Smith } 827edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829d6dfbf8fSBarry Smith } 830d6dfbf8fSBarry Smith } 831d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 832da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 833da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 834da3a660dSBarry Smith } 83506be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 83678b31e54SBarry Smith CHKERRQ(ierr); 83706be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 83878b31e54SBarry Smith CHKERRQ(ierr); 839d6dfbf8fSBarry Smith while (its--) { 840d6dfbf8fSBarry Smith /* go down through the rows */ 841d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 842d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 843d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 844d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 845d6dfbf8fSBarry Smith sum = b[i]; 846d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 847d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 848d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 849d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 850d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 851d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 852d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 853d6dfbf8fSBarry Smith } 854d6dfbf8fSBarry Smith /* come up through the rows */ 855d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 856d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 857d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 858d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 859d6dfbf8fSBarry Smith sum = b[i]; 860d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 861d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 862d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 863d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 864d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 865d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 866d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867d6dfbf8fSBarry Smith } 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith } 870d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 872da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 873da3a660dSBarry Smith } 87406be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 87578b31e54SBarry Smith CHKERRQ(ierr); 87606be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 87778b31e54SBarry Smith CHKERRQ(ierr); 878d6dfbf8fSBarry Smith while (its--) { 879d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 880d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 881d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 882d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 883d6dfbf8fSBarry Smith sum = b[i]; 884d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 885d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 886d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 887d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 888d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 889d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 890d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 891d6dfbf8fSBarry Smith } 892d6dfbf8fSBarry Smith } 893d6dfbf8fSBarry Smith } 894d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 895da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 896da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 897da3a660dSBarry Smith } 89806be10caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 89978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 90006be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 90178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 902d6dfbf8fSBarry Smith while (its--) { 903d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 904d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 905d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 906d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 907d6dfbf8fSBarry Smith sum = b[i]; 908d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 909d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 910d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 911d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 912d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 913d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 914d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 915d6dfbf8fSBarry Smith } 916d6dfbf8fSBarry Smith } 917d6dfbf8fSBarry Smith } 9188a729477SBarry Smith return 0; 9198a729477SBarry Smith } 920a66be287SLois Curfman McInnes 921fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 922fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 923a66be287SLois Curfman McInnes { 924a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 926a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 927a66be287SLois Curfman McInnes 92878b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 92978b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 930a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 931a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 932a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 933a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 934a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 935a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 936a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 937a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 938a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 939a66be287SLois Curfman McInnes } 940a66be287SLois Curfman McInnes return 0; 941a66be287SLois Curfman McInnes } 942a66be287SLois Curfman McInnes 943299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 944299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 945299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 946299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 947299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 948299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 949299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 950299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 951299609e3SLois Curfman McInnes 952fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 953c74985f6SBarry Smith { 95444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 955c74985f6SBarry Smith 956c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 957c74985f6SBarry Smith MatSetOption(aij->A,op); 958c74985f6SBarry Smith MatSetOption(aij->B,op); 959c74985f6SBarry Smith } 960c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 961c74985f6SBarry Smith MatSetOption(aij->A,op); 962c74985f6SBarry Smith MatSetOption(aij->B,op); 963c74985f6SBarry Smith } 96478b31e54SBarry Smith else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported"); 965c74985f6SBarry Smith return 0; 966c74985f6SBarry Smith } 967c74985f6SBarry Smith 968d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 969c74985f6SBarry Smith { 97044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 971c74985f6SBarry Smith *m = mat->M; *n = mat->N; 972c74985f6SBarry Smith return 0; 973c74985f6SBarry Smith } 974c74985f6SBarry Smith 975d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 976c74985f6SBarry Smith { 97744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978b7c46309SBarry Smith *m = mat->m; *n = mat->N; 979c74985f6SBarry Smith return 0; 980c74985f6SBarry Smith } 981c74985f6SBarry Smith 982d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 983c74985f6SBarry Smith { 98444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 985c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 986c74985f6SBarry Smith return 0; 987c74985f6SBarry Smith } 988c74985f6SBarry Smith 98939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 99039e00950SLois Curfman McInnes { 991154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 993154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 994154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 99539e00950SLois Curfman McInnes 99639e00950SLois Curfman McInnes if (row < rstart || row >= rend) 99778b31e54SBarry Smith SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.") 998abc0e9e4SLois Curfman McInnes lrow = row - rstart; 99939e00950SLois Curfman McInnes 1000154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1001154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1002154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 100378b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 100478b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1005154123eaSLois Curfman McInnes nztot = nzA + nzB; 1006154123eaSLois Curfman McInnes 1007154123eaSLois Curfman McInnes if (v || idx) { 1008154123eaSLois Curfman McInnes if (nztot) { 1009154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1010299609e3SLois Curfman McInnes int imark; 101148b35521SBarry Smith if (mat->assembled) { 1012154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 101348b35521SBarry Smith } 1014154123eaSLois Curfman McInnes if (v) { 101578b31e54SBarry Smith *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 101639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1017154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1018154123eaSLois Curfman McInnes else break; 1019154123eaSLois Curfman McInnes } 1020154123eaSLois Curfman McInnes imark = i; 1021154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1022299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1023154123eaSLois Curfman McInnes } 1024154123eaSLois Curfman McInnes if (idx) { 102578b31e54SBarry Smith *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1026154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1027154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1028154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1029154123eaSLois Curfman McInnes else break; 1030154123eaSLois Curfman McInnes } 1031154123eaSLois Curfman McInnes imark = i; 1032154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1033299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 103439e00950SLois Curfman McInnes } 103539e00950SLois Curfman McInnes } 103639e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1037154123eaSLois Curfman McInnes } 103839e00950SLois Curfman McInnes *nz = nztot; 103978b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 104078b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 104139e00950SLois Curfman McInnes return 0; 104239e00950SLois Curfman McInnes } 104339e00950SLois Curfman McInnes 104439e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 104539e00950SLois Curfman McInnes { 104678b31e54SBarry Smith if (idx) PETSCFREE(*idx); 104778b31e54SBarry Smith if (v) PETSCFREE(*v); 104839e00950SLois Curfman McInnes return 0; 104939e00950SLois Curfman McInnes } 105039e00950SLois Curfman McInnes 1051*855ac2c5SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1052*855ac2c5SLois Curfman McInnes { 1053*855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1054*855ac2c5SLois Curfman McInnes int ierr; 1055*855ac2c5SLois Curfman McInnes if (aij->numtids == 1) { 1056*855ac2c5SLois Curfman McInnes ierr = MatNorm_MPIAIJ(aij->A,type,norm); CHKERRQ(ierr); 1057*855ac2c5SLois Curfman McInnes } else 1058*855ac2c5SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ: not yet supported in parallel."); 1059*855ac2c5SLois Curfman McInnes return 0; 1060*855ac2c5SLois Curfman McInnes } 1061*855ac2c5SLois Curfman McInnes 1062b7c46309SBarry Smith static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1063b7c46309SBarry Smith { 1064b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1065b7c46309SBarry Smith int ierr; 1066b7c46309SBarry Smith Mat B; 1067b7c46309SBarry Smith Mat_AIJ *Aloc; 1068b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1069b7c46309SBarry Smith Scalar *array; 1070b7c46309SBarry Smith 1071b7c46309SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1072b7c46309SBarry Smith CHKERRQ(ierr); 1073b7c46309SBarry Smith 1074b7c46309SBarry Smith /* copy over the A part */ 1075b7c46309SBarry Smith Aloc = (Mat_AIJ*) a->A->data; 1076b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1077b7c46309SBarry Smith row = a->rstart; 1078b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1079b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1080b7c46309SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1081b7c46309SBarry Smith CHKERRQ(ierr); 1082b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1083b7c46309SBarry Smith } 1084b7c46309SBarry Smith aj = Aloc->j; 1085b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1086b7c46309SBarry Smith 1087b7c46309SBarry Smith /* copy over the B part */ 1088b7c46309SBarry Smith Aloc = (Mat_AIJ*) a->B->data; 1089b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1090b7c46309SBarry Smith row = a->rstart; 1091b7c46309SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1092b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1093b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1094b7c46309SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1095b7c46309SBarry Smith CHKERRQ(ierr); 1096b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1097b7c46309SBarry Smith } 1098b7c46309SBarry Smith PETSCFREE(ct); 1099b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1100b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1101b7c46309SBarry Smith *Bin = B; 1102b7c46309SBarry Smith return 0; 1103b7c46309SBarry Smith } 1104b7c46309SBarry Smith 1105fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1106ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1107d6dfbf8fSBarry Smith 11088a729477SBarry Smith /* -------------------------------------------------------------------*/ 11092ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 111039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 111144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 111244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1113299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1114299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1115299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 111644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1117b7c46309SBarry Smith MatTranspose_MPIAIJ, 1118a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1119*855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1120ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11211eb62cbbSBarry Smith 0, 1122299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1123299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1124299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1125d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1126299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1127ff7509f2SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 11288a729477SBarry Smith 11298a729477SBarry Smith /*@ 1130ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1131ff756334SLois Curfman McInnes (the default parallel PETSc format). 11328a729477SBarry Smith 11338a729477SBarry Smith Input Parameters: 11341eb62cbbSBarry Smith . comm - MPI communicator 11357d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 11367d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 11377d3e4905SLois Curfman McInnes if N is given) 11387d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 11397d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 11407d3e4905SLois Curfman McInnes if n is given) 1141ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1142ff756334SLois Curfman McInnes (same for all local rows) 1143ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1144ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1145ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1146ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1147ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1148ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1149ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 11508a729477SBarry Smith 1151ff756334SLois Curfman McInnes Output Parameter: 11528a729477SBarry Smith . newmat - the matrix 11538a729477SBarry Smith 1154ff756334SLois Curfman McInnes Notes: 1155ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1156ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 1157ff756334SLois Curfman McInnes storage. That is, the stored row and column indices begin at 1158ab693e5aSLois Curfman McInnes one, not zero. See the users manual for further details. 1159ff756334SLois Curfman McInnes 1160ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1161ff756334SLois Curfman McInnes (possibly both). 1162ff756334SLois Curfman McInnes 1163e0245417SLois Curfman McInnes Storage Information: 1164e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1165e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1166e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1167e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1168e0245417SLois Curfman McInnes 1169e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1170e0245417SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1171e0245417SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1172e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1173e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1174ff756334SLois Curfman McInnes 1175dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1176ff756334SLois Curfman McInnes 1177dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 11788a729477SBarry Smith @*/ 11791eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 11801eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 11818a729477SBarry Smith { 11828a729477SBarry Smith Mat mat; 118344a69424SLois Curfman McInnes Mat_MPIAIJ *aij; 11846abc6512SBarry Smith int ierr, i,sum[2],work[2]; 11858a729477SBarry Smith *newmat = 0; 118644a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1187a5a9c739SBarry Smith PLogObjectCreate(mat); 118878b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 11898a729477SBarry Smith mat->ops = &MatOps; 119044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 119144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 11928a729477SBarry Smith mat->factor = 0; 1193d6dfbf8fSBarry Smith 119407a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 11951eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 11961eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 11971eb62cbbSBarry Smith 1198dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 11991eb62cbbSBarry Smith work[0] = m; work[1] = n; 1200d6dfbf8fSBarry Smith MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1201dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1202dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 12031eb62cbbSBarry Smith } 1204dbd7a890SLois Curfman McInnes if (m == PETSC_DECIDE) 1205dbd7a890SLois Curfman McInnes {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1206dbd7a890SLois Curfman McInnes if (n == PETSC_DECIDE) 1207dbd7a890SLois Curfman McInnes {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 12088a729477SBarry Smith aij->m = m; 12098a729477SBarry Smith aij->n = n; 12101eb62cbbSBarry Smith aij->N = N; 12111eb62cbbSBarry Smith aij->M = M; 12121eb62cbbSBarry Smith 12131eb62cbbSBarry Smith /* build local table of row and column ownerships */ 121478b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 121578b31e54SBarry Smith CHKPTRQ(aij->rowners); 12161eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 12171eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 12181eb62cbbSBarry Smith aij->rowners[0] = 0; 12191eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 12201eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 12218a729477SBarry Smith } 12221eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 12231eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 12241eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 12251eb62cbbSBarry Smith aij->cowners[0] = 0; 12261eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 12271eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 12288a729477SBarry Smith } 12291eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 12301eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 12318a729477SBarry Smith 12328a729477SBarry Smith 12336b5873e3SBarry Smith ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 123478b31e54SBarry Smith CHKERRQ(ierr); 1235a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 12366b5873e3SBarry Smith ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 123778b31e54SBarry Smith CHKERRQ(ierr); 1238a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 12398a729477SBarry Smith 12401eb62cbbSBarry Smith /* build cache for off array entries formed */ 124178b31e54SBarry Smith ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 12429e25ed09SBarry Smith aij->colmap = 0; 12439e25ed09SBarry Smith aij->garray = 0; 12448a729477SBarry Smith 12451eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 12461eb62cbbSBarry Smith aij->lvec = 0; 12471eb62cbbSBarry Smith aij->Mvctx = 0; 1248d6dfbf8fSBarry Smith aij->assembled = 0; 12498a729477SBarry Smith 1250d6dfbf8fSBarry Smith *newmat = mat; 1251d6dfbf8fSBarry Smith return 0; 1252d6dfbf8fSBarry Smith } 1253c74985f6SBarry Smith 1254ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1255d6dfbf8fSBarry Smith { 1256d6dfbf8fSBarry Smith Mat mat; 125744a69424SLois Curfman McInnes Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 12582ee70a88SLois Curfman McInnes int ierr, len; 1259d6dfbf8fSBarry Smith *newmat = 0; 1260d6dfbf8fSBarry Smith 126178b31e54SBarry Smith if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix"); 126244a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1263a5a9c739SBarry Smith PLogObjectCreate(mat); 126478b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1265d6dfbf8fSBarry Smith mat->ops = &MatOps; 126644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 126744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1268d6dfbf8fSBarry Smith mat->factor = matin->factor; 1269d6dfbf8fSBarry Smith 1270d6dfbf8fSBarry Smith aij->m = oldmat->m; 1271d6dfbf8fSBarry Smith aij->n = oldmat->n; 1272d6dfbf8fSBarry Smith aij->M = oldmat->M; 1273d6dfbf8fSBarry Smith aij->N = oldmat->N; 1274d6dfbf8fSBarry Smith 1275d6dfbf8fSBarry Smith aij->assembled = 1; 1276d6dfbf8fSBarry Smith aij->rstart = oldmat->rstart; 1277d6dfbf8fSBarry Smith aij->rend = oldmat->rend; 1278d6dfbf8fSBarry Smith aij->cstart = oldmat->cstart; 1279d6dfbf8fSBarry Smith aij->cend = oldmat->cend; 1280d6dfbf8fSBarry Smith aij->numtids = oldmat->numtids; 1281d6dfbf8fSBarry Smith aij->mytid = oldmat->mytid; 128207a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 1283d6dfbf8fSBarry Smith 128478b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 128578b31e54SBarry Smith CHKPTRQ(aij->rowners); 128678b31e54SBarry Smith PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 128778b31e54SBarry Smith ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 12882ee70a88SLois Curfman McInnes if (oldmat->colmap) { 128978b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 129078b31e54SBarry Smith CHKPTRQ(aij->colmap); 129178b31e54SBarry Smith PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 12922ee70a88SLois Curfman McInnes } else aij->colmap = 0; 12932ee70a88SLois Curfman McInnes if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 129478b31e54SBarry Smith aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 129578b31e54SBarry Smith PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 12962ee70a88SLois Curfman McInnes } else aij->garray = 0; 1297d6dfbf8fSBarry Smith 129878b31e54SBarry Smith ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1299a5a9c739SBarry Smith PLogObjectParent(mat,aij->lvec); 130078b31e54SBarry Smith ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1301a5a9c739SBarry Smith PLogObjectParent(mat,aij->Mvctx); 130278b31e54SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1303a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 130478b31e54SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1305a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 13068a729477SBarry Smith *newmat = mat; 13078a729477SBarry Smith return 0; 13088a729477SBarry Smith } 1309