1cb512458SBarry Smith #ifndef lint 2*51c98ccdSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.57 1995/07/12 03:04:20 curfman Exp curfman $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1744a69424SLois Curfman McInnes Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 189e25ed09SBarry Smith int n = B->n,i; 1978b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 2078b31e54SBarry Smith PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21abc0e9e4SLois Curfman McInnes for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 229e25ed09SBarry Smith return 0; 239e25ed09SBarry Smith } 249e25ed09SBarry Smith 252493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 262493cbb0SBarry Smith 27*51c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 28299609e3SLois Curfman McInnes { 29299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 30299609e3SLois Curfman McInnes int ierr; 31299609e3SLois Curfman McInnes if (aij->numtids == 1) { 32*51c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 33299609e3SLois Curfman McInnes } else 34299609e3SLois Curfman McInnes SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel."); 35299609e3SLois Curfman McInnes return 0; 36299609e3SLois Curfman McInnes } 37299609e3SLois Curfman McInnes 382ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 391eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 421eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 431eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 448a729477SBarry Smith 4507a0e7adSLois Curfman McInnes if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 4678b31e54SBarry Smith SETERRQ(1,"You cannot mix inserts and adds"); 478a729477SBarry Smith } 481eb62cbbSBarry Smith aij->insertmode = addv; 498a729477SBarry Smith for ( i=0; i<m; i++ ) { 5078b31e54SBarry Smith if (idxm[i] < 0) SETERRQ(1,"Negative row index"); 5178b31e54SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large"); 521eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 531eb62cbbSBarry Smith row = idxm[i] - rstart; 541eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 5578b31e54SBarry Smith if (idxn[j] < 0) SETERRQ(1,"Negative column index"); 5678b31e54SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large"); 571eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 581eb62cbbSBarry Smith col = idxn[j] - cstart; 5978b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 601eb62cbbSBarry Smith } 611eb62cbbSBarry Smith else { 62d6dfbf8fSBarry Smith if (aij->assembled) { 63b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 649e25ed09SBarry Smith col = aij->colmap[idxn[j]] - 1; 65b7c46309SBarry Smith if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 662493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 672493cbb0SBarry Smith col = idxn[j]; 68d6dfbf8fSBarry Smith } 699e25ed09SBarry Smith } 709e25ed09SBarry Smith else col = idxn[j]; 7178b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 721eb62cbbSBarry Smith } 731eb62cbbSBarry Smith } 741eb62cbbSBarry Smith } 751eb62cbbSBarry Smith else { 76dbd7a890SLois Curfman McInnes ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 7778b31e54SBarry Smith CHKERRQ(ierr); 781eb62cbbSBarry Smith } 798a729477SBarry Smith } 808a729477SBarry Smith return 0; 818a729477SBarry Smith } 828a729477SBarry Smith 838a729477SBarry Smith /* 841eb62cbbSBarry Smith the assembly code is a lot like the code for vectors, we should 851eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 861eb62cbbSBarry Smith either case. 878a729477SBarry Smith */ 888a729477SBarry Smith 89fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 908a729477SBarry Smith { 9144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 92d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 936abc6512SBarry Smith int numtids = aij->numtids, *owners = aij->rowners; 941eb62cbbSBarry Smith int mytid = aij->mytid; 951eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 966abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 975392566eSBarry Smith int tag = mat->tag, *owner,*starts,count,ierr; 981eb62cbbSBarry Smith InsertMode addv; 991eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1001eb62cbbSBarry Smith 1011eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 10228988994SBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 1031eb62cbbSBarry Smith MPI_BOR,comm); 1049d00d63dSBarry Smith if (addv == (ADDVALUES|INSERTVALUES)) { 10578b31e54SBarry Smith SETERRQ(1,"Some processors have inserted while others have added"); 1061eb62cbbSBarry Smith } 1071eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1081eb62cbbSBarry Smith 1091eb62cbbSBarry Smith /* first count number of contributors to each processor */ 11078b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 11178b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 11278b31e54SBarry Smith owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1131eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1141eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1151eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1161eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1171eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1188a729477SBarry Smith } 1198a729477SBarry Smith } 1208a729477SBarry Smith } 1211eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1221eb62cbbSBarry Smith 1231eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 12478b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 1251eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1261eb62cbbSBarry Smith nreceives = work[mytid]; 1271eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1281eb62cbbSBarry Smith nmax = work[mytid]; 12978b31e54SBarry Smith PETSCFREE(work); 1301eb62cbbSBarry Smith 1311eb62cbbSBarry Smith /* post receives: 1321eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1331eb62cbbSBarry Smith (global index,value) we store the global index as a double 134d6dfbf8fSBarry Smith to simplify the message passing. 1351eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1361eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1371eb62cbbSBarry Smith this is a lot of wasted space. 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith 1401eb62cbbSBarry Smith This could be done better. 1411eb62cbbSBarry Smith */ 14278b31e54SBarry Smith rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 14378b31e54SBarry Smith CHKPTRQ(rvalues); 14478b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 14578b31e54SBarry Smith CHKPTRQ(recv_waits); 1461eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 147c60448a5SBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1481eb62cbbSBarry Smith comm,recv_waits+i); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith 1511eb62cbbSBarry Smith /* do sends: 1521eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1531eb62cbbSBarry Smith the ith processor 1541eb62cbbSBarry Smith */ 15578b31e54SBarry Smith svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 15678b31e54SBarry Smith CHKPTRQ(svalues); 15778b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 15878b31e54SBarry Smith CHKPTRQ(send_waits); 15978b31e54SBarry Smith starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 1601eb62cbbSBarry Smith starts[0] = 0; 1611eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1621eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1631eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1641eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1651eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1661eb62cbbSBarry Smith } 16778b31e54SBarry Smith PETSCFREE(owner); 1681eb62cbbSBarry Smith starts[0] = 0; 1691eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1701eb62cbbSBarry Smith count = 0; 1711eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1721eb62cbbSBarry Smith if (procs[i]) { 173c60448a5SBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1741eb62cbbSBarry Smith comm,send_waits+count++); 1751eb62cbbSBarry Smith } 1761eb62cbbSBarry Smith } 17778b31e54SBarry Smith PETSCFREE(starts); PETSCFREE(nprocs); 1781eb62cbbSBarry Smith 1791eb62cbbSBarry Smith /* Free cache space */ 18078b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1831eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1841eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1851eb62cbbSBarry Smith aij->rmax = nmax; 1861eb62cbbSBarry Smith 1871eb62cbbSBarry Smith return 0; 1881eb62cbbSBarry Smith } 18944a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 1901eb62cbbSBarry Smith 191fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 1921eb62cbbSBarry Smith { 1931eb62cbbSBarry Smith int ierr; 19444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1951eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 1966abc6512SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 1972493cbb0SBarry Smith int row,col,other_disassembled; 1981eb62cbbSBarry Smith Scalar *values,val; 1991eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2001eb62cbbSBarry Smith 2011eb62cbbSBarry Smith /* wait on receives */ 2021eb62cbbSBarry Smith while (count) { 203d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2041eb62cbbSBarry Smith /* unpack receives into our local space */ 205d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 206c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2071eb62cbbSBarry Smith n = n/3; 2081eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2091eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2101eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2111eb62cbbSBarry Smith val = values[3*i+2]; 2121eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2131eb62cbbSBarry Smith col -= aij->cstart; 2141eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2151eb62cbbSBarry Smith } 2161eb62cbbSBarry Smith else { 217d6dfbf8fSBarry Smith if (aij->assembled) { 218b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 2199e25ed09SBarry Smith col = aij->colmap[col] - 1; 220b7c46309SBarry Smith if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 2212493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2222493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 223d6dfbf8fSBarry Smith } 2249e25ed09SBarry Smith } 2251eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2261eb62cbbSBarry Smith } 2271eb62cbbSBarry Smith } 2281eb62cbbSBarry Smith count--; 2291eb62cbbSBarry Smith } 23078b31e54SBarry Smith PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 2311eb62cbbSBarry Smith 2321eb62cbbSBarry Smith /* wait on sends */ 2331eb62cbbSBarry Smith if (aij->nsends) { 23478b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 23578b31e54SBarry Smith CHKPTRQ(send_status); 2361eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 23778b31e54SBarry Smith PETSCFREE(send_status); 2381eb62cbbSBarry Smith } 23978b31e54SBarry Smith PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 2401eb62cbbSBarry Smith 24107a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 24278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 24378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2441eb62cbbSBarry Smith 2452493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2462493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 2472493cbb0SBarry Smith MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 2482493cbb0SBarry Smith MPI_INT,MPI_PROD,mat->comm); 2492493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2502493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2512493cbb0SBarry Smith } 2522493cbb0SBarry Smith 2535e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 25478b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 255d6dfbf8fSBarry Smith aij->assembled = 1; 2565e42470aSBarry Smith } 25778b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 25878b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2595e42470aSBarry Smith 2608a729477SBarry Smith return 0; 2618a729477SBarry Smith } 2628a729477SBarry Smith 2632ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2641eb62cbbSBarry Smith { 26544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 266dbd7a890SLois Curfman McInnes int ierr; 26778b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 26878b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2691eb62cbbSBarry Smith return 0; 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith 2721eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2731eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2741eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2771eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2781eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2791eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2801eb62cbbSBarry Smith routine. 2811eb62cbbSBarry Smith */ 28244a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 2831eb62cbbSBarry Smith { 28444a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 2851eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2866abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 2871eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2885392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 289d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 290d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2911eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2921eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2931eb62cbbSBarry Smith IS istmp; 2941eb62cbbSBarry Smith 295c8f67822SLois Curfman McInnes if (!l->assembled) 29678b31e54SBarry Smith SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first"); 29778b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 29878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2991eb62cbbSBarry Smith 3001eb62cbbSBarry Smith /* first count number of contributors to each processor */ 30178b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 30278b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 30378b31e54SBarry Smith owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3041eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3051eb62cbbSBarry Smith idx = rows[i]; 3061eb62cbbSBarry Smith found = 0; 3071eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 3081eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3091eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3101eb62cbbSBarry Smith } 3111eb62cbbSBarry Smith } 31278b31e54SBarry Smith if (!found) SETERRQ(1,"Index out of range."); 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3151eb62cbbSBarry Smith 3161eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 31778b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 3181eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3191eb62cbbSBarry Smith nrecvs = work[mytid]; 3201eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3211eb62cbbSBarry Smith nmax = work[mytid]; 32278b31e54SBarry Smith PETSCFREE(work); 3231eb62cbbSBarry Smith 3241eb62cbbSBarry Smith /* post receives: */ 32578b31e54SBarry Smith rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 32678b31e54SBarry Smith CHKPTRQ(rvalues); 32778b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 32878b31e54SBarry Smith CHKPTRQ(recv_waits); 3291eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3301eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3311eb62cbbSBarry Smith comm,recv_waits+i); 3321eb62cbbSBarry Smith } 3331eb62cbbSBarry Smith 3341eb62cbbSBarry Smith /* do sends: 3351eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3361eb62cbbSBarry Smith the ith processor 3371eb62cbbSBarry Smith */ 33878b31e54SBarry Smith svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 33978b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 34078b31e54SBarry Smith CHKPTRQ(send_waits); 34178b31e54SBarry Smith starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 3421eb62cbbSBarry Smith starts[0] = 0; 3431eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3441eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3451eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3461eb62cbbSBarry Smith } 3471eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3481eb62cbbSBarry Smith 3491eb62cbbSBarry Smith starts[0] = 0; 3501eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3511eb62cbbSBarry Smith count = 0; 3521eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3531eb62cbbSBarry Smith if (procs[i]) { 3541eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3551eb62cbbSBarry Smith comm,send_waits+count++); 3561eb62cbbSBarry Smith } 3571eb62cbbSBarry Smith } 35878b31e54SBarry Smith PETSCFREE(starts); 3591eb62cbbSBarry Smith 3601eb62cbbSBarry Smith base = owners[mytid]; 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* wait on receives */ 36378b31e54SBarry Smith lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3641eb62cbbSBarry Smith source = lens + nrecvs; 3651eb62cbbSBarry Smith count = nrecvs; slen = 0; 3661eb62cbbSBarry Smith while (count) { 367d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3681eb62cbbSBarry Smith /* unpack receives into our local space */ 3691eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 370d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 371d6dfbf8fSBarry Smith lens[imdex] = n; 3721eb62cbbSBarry Smith slen += n; 3731eb62cbbSBarry Smith count--; 3741eb62cbbSBarry Smith } 37578b31e54SBarry Smith PETSCFREE(recv_waits); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith /* move the data into the send scatter */ 37878b31e54SBarry Smith lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3791eb62cbbSBarry Smith count = 0; 3801eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3811eb62cbbSBarry Smith values = rvalues + i*nmax; 3821eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3831eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3841eb62cbbSBarry Smith } 3851eb62cbbSBarry Smith } 38678b31e54SBarry Smith PETSCFREE(rvalues); PETSCFREE(lens); 38778b31e54SBarry Smith PETSCFREE(owner); PETSCFREE(nprocs); 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* actually zap the local rows */ 3906b5873e3SBarry Smith ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 39178b31e54SBarry Smith CHKERRQ(ierr); PETSCFREE(lrows); 39278b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 39378b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 39478b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* wait on sends */ 3971eb62cbbSBarry Smith if (nsends) { 39878b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 39978b31e54SBarry Smith CHKPTRQ(send_status); 4001eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 40178b31e54SBarry Smith PETSCFREE(send_status); 4021eb62cbbSBarry Smith } 40378b31e54SBarry Smith PETSCFREE(send_waits); PETSCFREE(svalues); 4041eb62cbbSBarry Smith 4051eb62cbbSBarry Smith 4061eb62cbbSBarry Smith return 0; 4071eb62cbbSBarry Smith } 4081eb62cbbSBarry Smith 40944a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 4101eb62cbbSBarry Smith { 41144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 4121eb62cbbSBarry Smith int ierr; 41378b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 41406be10caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 41578b31e54SBarry Smith CHKERRQ(ierr); 41678b31e54SBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 41706be10caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 41878b31e54SBarry Smith CHKERRQ(ierr); 41978b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 4201eb62cbbSBarry Smith return 0; 4211eb62cbbSBarry Smith } 4221eb62cbbSBarry Smith 42344a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 424da3a660dSBarry Smith { 42544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 426da3a660dSBarry Smith int ierr; 42778b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 42806be10caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 42978b31e54SBarry Smith CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 43106be10caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 43278b31e54SBarry Smith CHKERRQ(ierr); 43378b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 434da3a660dSBarry Smith return 0; 435da3a660dSBarry Smith } 436da3a660dSBarry Smith 43744a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 438da3a660dSBarry Smith { 43944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 440da3a660dSBarry Smith int ierr; 441da3a660dSBarry Smith 44244a69424SLois Curfman McInnes if (!aij->assembled) 44378b31e54SBarry Smith SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 444da3a660dSBarry Smith /* do nondiagonal part */ 44578b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 446da3a660dSBarry Smith /* send it on its way */ 44706be10caSBarry Smith ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 44878b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 449da3a660dSBarry Smith /* do local part */ 45078b31e54SBarry Smith ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 451da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 452da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 453da3a660dSBarry Smith /* but is not perhaps always true. */ 45406be10caSBarry Smith ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 45578b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 456da3a660dSBarry Smith return 0; 457da3a660dSBarry Smith } 458da3a660dSBarry Smith 45944a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 460da3a660dSBarry Smith { 46144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 462da3a660dSBarry Smith int ierr; 463da3a660dSBarry Smith 46444a69424SLois Curfman McInnes if (!aij->assembled) 46578b31e54SBarry Smith SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 466da3a660dSBarry Smith /* do nondiagonal part */ 46778b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 468da3a660dSBarry Smith /* send it on its way */ 46906be10caSBarry Smith ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 47078b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 471da3a660dSBarry Smith /* do local part */ 47278b31e54SBarry Smith ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 473da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 474da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 475da3a660dSBarry Smith /* but is not perhaps always true. */ 47606be10caSBarry Smith ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 47778b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 478da3a660dSBarry Smith return 0; 479da3a660dSBarry Smith } 480da3a660dSBarry Smith 4811eb62cbbSBarry Smith /* 4821eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4831eb62cbbSBarry Smith diagonal block 4841eb62cbbSBarry Smith */ 485d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 4861eb62cbbSBarry Smith { 48744a69424SLois Curfman McInnes Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 48878b31e54SBarry Smith if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 4891eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4901eb62cbbSBarry Smith } 4911eb62cbbSBarry Smith 49244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 4931eb62cbbSBarry Smith { 4941eb62cbbSBarry Smith Mat mat = (Mat) obj; 49544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4961eb62cbbSBarry Smith int ierr; 497a5a9c739SBarry Smith #if defined(PETSC_LOG) 498a5a9c739SBarry Smith PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 499a5a9c739SBarry Smith #endif 50078b31e54SBarry Smith PETSCFREE(aij->rowners); 50178b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 50278b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 50378b31e54SBarry Smith if (aij->colmap) PETSCFREE(aij->colmap); 50478b31e54SBarry Smith if (aij->garray) PETSCFREE(aij->garray); 5051eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 5061eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 50778b31e54SBarry Smith PETSCFREE(aij); 508a5a9c739SBarry Smith PLogObjectDestroy(mat); 509a5a9c739SBarry Smith PETSCHEADERDESTROY(mat); 5101eb62cbbSBarry Smith return 0; 5111eb62cbbSBarry Smith } 5127857610eSBarry Smith #include "draw.h" 513ee50ffe9SBarry Smith #include "pviewer.h" 514ee50ffe9SBarry Smith 51544a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 5161eb62cbbSBarry Smith { 5171eb62cbbSBarry Smith Mat mat = (Mat) obj; 51844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5191eb62cbbSBarry Smith int ierr; 520d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 5211eb62cbbSBarry Smith 52278b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first"); 523154123eaSLois Curfman McInnes if (!viewer) { /* so that viewers may be used from debuggers */ 524154123eaSLois Curfman McInnes viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 525154123eaSLois Curfman McInnes } 526ab254492SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 5277857610eSBarry Smith if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 5284a0ce102SLois Curfman McInnes FILE *fd = ViewerFileGetPointer_Private(viewer); 5297f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 530d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 5311eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5321eb62cbbSBarry Smith aij->cend); 53378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 53478b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 535d13ab20cSBarry Smith fflush(fd); 5367f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 537d13ab20cSBarry Smith } 5387857610eSBarry Smith else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 5397857610eSBarry Smith vobj->cookie == DRAW_COOKIE) { 54095373324SBarry Smith int numtids = aij->numtids, mytid = aij->mytid; 54195373324SBarry Smith if (numtids == 1) { 54278b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 54395373324SBarry Smith } 54495373324SBarry Smith else { 54595373324SBarry Smith /* assemble the entire matrix onto first processor. */ 54695373324SBarry Smith Mat A; 5472ee70a88SLois Curfman McInnes Mat_AIJ *Aloc; 5482eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 54995373324SBarry Smith Scalar *a; 5502ee70a88SLois Curfman McInnes 55195373324SBarry Smith if (!mytid) { 55295373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 55395373324SBarry Smith } 55495373324SBarry Smith else { 55595373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 55695373324SBarry Smith } 55778b31e54SBarry Smith CHKERRQ(ierr); 55895373324SBarry Smith 55995373324SBarry Smith /* copy over the A part */ 5602ee70a88SLois Curfman McInnes Aloc = (Mat_AIJ*) aij->A->data; 5612ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 56295373324SBarry Smith row = aij->rstart; 563b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 56495373324SBarry Smith for ( i=0; i<m; i++ ) { 56507a0e7adSLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 56678b31e54SBarry Smith CHKERRQ(ierr); 56795373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 56895373324SBarry Smith } 5692ee70a88SLois Curfman McInnes aj = Aloc->j; 570b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 57195373324SBarry Smith 57295373324SBarry Smith /* copy over the B part */ 5732ee70a88SLois Curfman McInnes Aloc = (Mat_AIJ*) aij->B->data; 5742ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 57595373324SBarry Smith row = aij->rstart; 57678b31e54SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 577b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 57895373324SBarry Smith for ( i=0; i<m; i++ ) { 57907a0e7adSLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 58078b31e54SBarry Smith CHKERRQ(ierr); 58195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 58295373324SBarry Smith } 58378b31e54SBarry Smith PETSCFREE(ct); 58478b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58578b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58695373324SBarry Smith if (!mytid) { 58778b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 58895373324SBarry Smith } 58978b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 59095373324SBarry Smith } 59195373324SBarry Smith } 5921eb62cbbSBarry Smith return 0; 5931eb62cbbSBarry Smith } 5941eb62cbbSBarry Smith 595d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ *); 5961eb62cbbSBarry Smith /* 5971eb62cbbSBarry Smith This has to provide several versions. 5981eb62cbbSBarry Smith 5991eb62cbbSBarry Smith 1) per sequential 6001eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6011eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 602d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6031eb62cbbSBarry Smith */ 604fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 605fc76ce87SLois Curfman McInnes double shift,int its,Vec xx) 6068a729477SBarry Smith { 60744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 608d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 60944a69424SLois Curfman McInnes Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 6106abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6116abc6512SBarry Smith int ierr,*idx, *diag; 6126abc6512SBarry Smith int n = mat->n, m = mat->m, i; 613da3a660dSBarry Smith Vec tt; 6148a729477SBarry Smith 61578b31e54SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first"); 6161eb62cbbSBarry Smith 617d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 618d6dfbf8fSBarry Smith xs = x -1; /* shift by one for index start of 1 */ 619da3a660dSBarry Smith ls--; 620d3c9fed9SLois Curfman McInnes if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 621d6dfbf8fSBarry Smith diag = A->diag; 622acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 62378b31e54SBarry Smith SETERRQ(1,"That option not yet support for parallel AIJ matrices"); 624acb40c82SBarry Smith } 625da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 626da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 627da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 628da3a660dSBarry Smith 629da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 630da3a660dSBarry Smith 631da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 632da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 633da3a660dSBarry Smith is the relaxation factor. 634da3a660dSBarry Smith */ 63578b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 636da3a660dSBarry Smith VecGetArray(tt,&t); 637da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 638da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 639da3a660dSBarry Smith VecSet(&zero,mat->lvec); 64006be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 64178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 642da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 643da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 644da3a660dSBarry Smith idx = A->j + diag[i]; 645da3a660dSBarry Smith v = A->a + diag[i]; 646da3a660dSBarry Smith sum = b[i]; 647da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 648da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 649da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 650da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 651da3a660dSBarry Smith v = B->a + B->i[i] - 1; 652da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 653da3a660dSBarry Smith x[i] = omega*(sum/d); 654da3a660dSBarry Smith } 655edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 65678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 657da3a660dSBarry Smith 658da3a660dSBarry Smith /* t = b - (2*E - D)x */ 659da3a660dSBarry Smith v = A->a; 660da3a660dSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 661da3a660dSBarry Smith 662da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 663da3a660dSBarry Smith ts = t - 1; /* shifted by one for index start of a or mat->j*/ 664da3a660dSBarry Smith diag = A->diag; 665da3a660dSBarry Smith VecSet(&zero,mat->lvec); 66606be10caSBarry Smith ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 66778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 668da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 669da3a660dSBarry Smith n = diag[i] - A->i[i]; 670da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 671da3a660dSBarry Smith v = A->a + A->i[i] - 1; 672da3a660dSBarry Smith sum = t[i]; 673da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 674da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 675da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 676da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 677da3a660dSBarry Smith v = B->a + B->i[i] - 1; 678da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 679da3a660dSBarry Smith t[i] = omega*(sum/d); 680da3a660dSBarry Smith } 681edd2f0e1SBarry Smith ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 68278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 683da3a660dSBarry Smith /* x = x + t */ 684da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 685da3a660dSBarry Smith VecDestroy(tt); 686da3a660dSBarry Smith return 0; 687da3a660dSBarry Smith } 688da3a660dSBarry Smith 6891eb62cbbSBarry Smith 690d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 691da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 692da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 693da3a660dSBarry Smith } 694da3a660dSBarry Smith else { 69506be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 69678b31e54SBarry Smith CHKERRQ(ierr); 69706be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 69878b31e54SBarry Smith CHKERRQ(ierr); 699da3a660dSBarry Smith } 700d6dfbf8fSBarry Smith while (its--) { 701d6dfbf8fSBarry Smith /* go down through the rows */ 70206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 70378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 704d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 705d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 706d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 707d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 708d6dfbf8fSBarry Smith sum = b[i]; 709d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 710d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 711d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 712d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 713d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 714d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 715d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 716d6dfbf8fSBarry Smith } 717edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 71878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 719d6dfbf8fSBarry Smith /* come up through the rows */ 72006be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 72178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 722d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 723d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 724d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 725d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 726d6dfbf8fSBarry Smith sum = b[i]; 727d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 728d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 729d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 730d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 731d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 732d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 733d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 734d6dfbf8fSBarry Smith } 735edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 73678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 737d6dfbf8fSBarry Smith } 738d6dfbf8fSBarry Smith } 739d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 740da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 741da3a660dSBarry Smith VecSet(&zero,mat->lvec); 74206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 74378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 744da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 745da3a660dSBarry Smith n = diag[i] - A->i[i]; 746da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 747da3a660dSBarry Smith v = A->a + A->i[i] - 1; 748da3a660dSBarry Smith sum = b[i]; 749da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 750da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 751da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 752da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 753da3a660dSBarry Smith v = B->a + B->i[i] - 1; 754da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 755da3a660dSBarry Smith x[i] = omega*(sum/d); 756da3a660dSBarry Smith } 757edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 75878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 759da3a660dSBarry Smith its--; 760da3a660dSBarry Smith } 761d6dfbf8fSBarry Smith while (its--) { 76206be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 76378b31e54SBarry Smith CHKERRQ(ierr); 76406be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 76578b31e54SBarry Smith CHKERRQ(ierr); 76606be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 769d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 770d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 771d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 772d6dfbf8fSBarry Smith sum = b[i]; 773d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 774d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 775d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 776d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 777d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 778d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 779d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 780d6dfbf8fSBarry Smith } 781edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 78278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 783d6dfbf8fSBarry Smith } 784d6dfbf8fSBarry Smith } 785d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 786da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 787da3a660dSBarry Smith VecSet(&zero,mat->lvec); 78806be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 78978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 790da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 791da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 792da3a660dSBarry Smith idx = A->j + diag[i]; 793da3a660dSBarry Smith v = A->a + diag[i]; 794da3a660dSBarry Smith sum = b[i]; 795da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 796da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 797da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 798da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 799da3a660dSBarry Smith v = B->a + B->i[i] - 1; 800da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 801da3a660dSBarry Smith x[i] = omega*(sum/d); 802da3a660dSBarry Smith } 803edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 80478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 805da3a660dSBarry Smith its--; 806da3a660dSBarry Smith } 807d6dfbf8fSBarry Smith while (its--) { 80806be10caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 80978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 81006be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 81178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 81206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 81378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 814d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 815d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 816d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 817d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 818d6dfbf8fSBarry Smith sum = b[i]; 819d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 820d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 821d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 822d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 823d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 824d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 825d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 826d6dfbf8fSBarry Smith } 827edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829d6dfbf8fSBarry Smith } 830d6dfbf8fSBarry Smith } 831d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 832da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 833da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 834da3a660dSBarry Smith } 83506be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 83678b31e54SBarry Smith CHKERRQ(ierr); 83706be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 83878b31e54SBarry Smith CHKERRQ(ierr); 839d6dfbf8fSBarry Smith while (its--) { 840d6dfbf8fSBarry Smith /* go down through the rows */ 841d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 842d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 843d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 844d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 845d6dfbf8fSBarry Smith sum = b[i]; 846d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 847d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 848d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 849d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 850d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 851d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 852d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 853d6dfbf8fSBarry Smith } 854d6dfbf8fSBarry Smith /* come up through the rows */ 855d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 856d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 857d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 858d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 859d6dfbf8fSBarry Smith sum = b[i]; 860d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 861d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 862d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 863d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 864d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 865d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 866d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867d6dfbf8fSBarry Smith } 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith } 870d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 872da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 873da3a660dSBarry Smith } 87406be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 87578b31e54SBarry Smith CHKERRQ(ierr); 87606be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 87778b31e54SBarry Smith CHKERRQ(ierr); 878d6dfbf8fSBarry Smith while (its--) { 879d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 880d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 881d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 882d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 883d6dfbf8fSBarry Smith sum = b[i]; 884d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 885d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 886d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 887d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 888d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 889d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 890d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 891d6dfbf8fSBarry Smith } 892d6dfbf8fSBarry Smith } 893d6dfbf8fSBarry Smith } 894d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 895da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 896da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 897da3a660dSBarry Smith } 89806be10caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 89978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 90006be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 90178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 902d6dfbf8fSBarry Smith while (its--) { 903d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 904d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 905d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 906d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 907d6dfbf8fSBarry Smith sum = b[i]; 908d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 909d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 910d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 911d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 912d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 913d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 914d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 915d6dfbf8fSBarry Smith } 916d6dfbf8fSBarry Smith } 917d6dfbf8fSBarry Smith } 9188a729477SBarry Smith return 0; 9198a729477SBarry Smith } 920a66be287SLois Curfman McInnes 921fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 922fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 923a66be287SLois Curfman McInnes { 924a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 926a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 927a66be287SLois Curfman McInnes 92878b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 92978b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 930a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 931a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 932a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 933a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 934a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 935a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 936a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 937a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 938a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 939a66be287SLois Curfman McInnes } 940a66be287SLois Curfman McInnes return 0; 941a66be287SLois Curfman McInnes } 942a66be287SLois Curfman McInnes 943299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 944299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 945299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 946299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 947299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 948299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 949299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 950299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 951299609e3SLois Curfman McInnes 952fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 953c74985f6SBarry Smith { 95444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 955c74985f6SBarry Smith 956c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 957c74985f6SBarry Smith MatSetOption(aij->A,op); 958c74985f6SBarry Smith MatSetOption(aij->B,op); 959c74985f6SBarry Smith } 960c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 961c74985f6SBarry Smith MatSetOption(aij->A,op); 962c74985f6SBarry Smith MatSetOption(aij->B,op); 963c74985f6SBarry Smith } 96478b31e54SBarry Smith else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported"); 965c74985f6SBarry Smith return 0; 966c74985f6SBarry Smith } 967c74985f6SBarry Smith 968d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 969c74985f6SBarry Smith { 97044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 971c74985f6SBarry Smith *m = mat->M; *n = mat->N; 972c74985f6SBarry Smith return 0; 973c74985f6SBarry Smith } 974c74985f6SBarry Smith 975d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 976c74985f6SBarry Smith { 97744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978b7c46309SBarry Smith *m = mat->m; *n = mat->N; 979c74985f6SBarry Smith return 0; 980c74985f6SBarry Smith } 981c74985f6SBarry Smith 982d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 983c74985f6SBarry Smith { 98444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 985c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 986c74985f6SBarry Smith return 0; 987c74985f6SBarry Smith } 988c74985f6SBarry Smith 98939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 99039e00950SLois Curfman McInnes { 991154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 993154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 994154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 99539e00950SLois Curfman McInnes 99639e00950SLois Curfman McInnes if (row < rstart || row >= rend) 99778b31e54SBarry Smith SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.") 998abc0e9e4SLois Curfman McInnes lrow = row - rstart; 99939e00950SLois Curfman McInnes 1000154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1001154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1002154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 100378b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 100478b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1005154123eaSLois Curfman McInnes nztot = nzA + nzB; 1006154123eaSLois Curfman McInnes 1007154123eaSLois Curfman McInnes if (v || idx) { 1008154123eaSLois Curfman McInnes if (nztot) { 1009154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1010299609e3SLois Curfman McInnes int imark; 101148b35521SBarry Smith if (mat->assembled) { 1012154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 101348b35521SBarry Smith } 1014154123eaSLois Curfman McInnes if (v) { 101578b31e54SBarry Smith *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 101639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1017154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1018154123eaSLois Curfman McInnes else break; 1019154123eaSLois Curfman McInnes } 1020154123eaSLois Curfman McInnes imark = i; 1021154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1022299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1023154123eaSLois Curfman McInnes } 1024154123eaSLois Curfman McInnes if (idx) { 102578b31e54SBarry Smith *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1026154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1027154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1028154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1029154123eaSLois Curfman McInnes else break; 1030154123eaSLois Curfman McInnes } 1031154123eaSLois Curfman McInnes imark = i; 1032154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1033299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 103439e00950SLois Curfman McInnes } 103539e00950SLois Curfman McInnes } 103639e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1037154123eaSLois Curfman McInnes } 103839e00950SLois Curfman McInnes *nz = nztot; 103978b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 104078b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 104139e00950SLois Curfman McInnes return 0; 104239e00950SLois Curfman McInnes } 104339e00950SLois Curfman McInnes 104439e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 104539e00950SLois Curfman McInnes { 104678b31e54SBarry Smith if (idx) PETSCFREE(*idx); 104778b31e54SBarry Smith if (v) PETSCFREE(*v); 104839e00950SLois Curfman McInnes return 0; 104939e00950SLois Curfman McInnes } 105039e00950SLois Curfman McInnes 1051b7c46309SBarry Smith static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1052b7c46309SBarry Smith { 1053b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1054b7c46309SBarry Smith int ierr; 1055b7c46309SBarry Smith Mat B; 1056b7c46309SBarry Smith Mat_AIJ *Aloc; 1057b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1058b7c46309SBarry Smith Scalar *array; 1059b7c46309SBarry Smith 1060b7c46309SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1061b7c46309SBarry Smith CHKERRQ(ierr); 1062b7c46309SBarry Smith 1063b7c46309SBarry Smith /* copy over the A part */ 1064b7c46309SBarry Smith Aloc = (Mat_AIJ*) a->A->data; 1065b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1066b7c46309SBarry Smith row = a->rstart; 1067b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1068b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1069b7c46309SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1070b7c46309SBarry Smith CHKERRQ(ierr); 1071b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1072b7c46309SBarry Smith } 1073b7c46309SBarry Smith aj = Aloc->j; 1074b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1075b7c46309SBarry Smith 1076b7c46309SBarry Smith /* copy over the B part */ 1077b7c46309SBarry Smith Aloc = (Mat_AIJ*) a->B->data; 1078b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1079b7c46309SBarry Smith row = a->rstart; 1080b7c46309SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1081b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1082b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1083b7c46309SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1084b7c46309SBarry Smith CHKERRQ(ierr); 1085b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1086b7c46309SBarry Smith } 1087b7c46309SBarry Smith PETSCFREE(ct); 1088b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1089b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1090b7c46309SBarry Smith *Bin = B; 1091b7c46309SBarry Smith return 0; 1092b7c46309SBarry Smith } 1093b7c46309SBarry Smith 1094fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1095ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1096d6dfbf8fSBarry Smith 10978a729477SBarry Smith /* -------------------------------------------------------------------*/ 10982ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 109939e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 110044a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 110144a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1102299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1103299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1104299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 110544a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1106b7c46309SBarry Smith MatTranspose_MPIAIJ, 1107a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1108d1710a03SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,0, 1109ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11101eb62cbbSBarry Smith 0, 1111299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1112299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1113299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1114d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1115299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1116ff7509f2SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 11178a729477SBarry Smith 11188a729477SBarry Smith /*@ 1119ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1120ff756334SLois Curfman McInnes (the default parallel PETSc format). 11218a729477SBarry Smith 11228a729477SBarry Smith Input Parameters: 11231eb62cbbSBarry Smith . comm - MPI communicator 11247d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 11257d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 11267d3e4905SLois Curfman McInnes if N is given) 11277d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 11287d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 11297d3e4905SLois Curfman McInnes if n is given) 1130ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1131ff756334SLois Curfman McInnes (same for all local rows) 1132ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1133ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1134ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1135ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1136ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1137ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1138ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 11398a729477SBarry Smith 1140ff756334SLois Curfman McInnes Output Parameter: 11418a729477SBarry Smith . newmat - the matrix 11428a729477SBarry Smith 1143ff756334SLois Curfman McInnes Notes: 1144ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1145ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 1146ff756334SLois Curfman McInnes storage. That is, the stored row and column indices begin at 1147ab693e5aSLois Curfman McInnes one, not zero. See the users manual for further details. 1148ff756334SLois Curfman McInnes 1149ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1150ff756334SLois Curfman McInnes (possibly both). 1151ff756334SLois Curfman McInnes 1152e0245417SLois Curfman McInnes Storage Information: 1153e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1154e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1155e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1156e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1157e0245417SLois Curfman McInnes 1158e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1159e0245417SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1160e0245417SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1161e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1162e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1163ff756334SLois Curfman McInnes 1164dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1165ff756334SLois Curfman McInnes 1166dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 11678a729477SBarry Smith @*/ 11681eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 11691eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 11708a729477SBarry Smith { 11718a729477SBarry Smith Mat mat; 117244a69424SLois Curfman McInnes Mat_MPIAIJ *aij; 11736abc6512SBarry Smith int ierr, i,sum[2],work[2]; 11748a729477SBarry Smith *newmat = 0; 117544a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1176a5a9c739SBarry Smith PLogObjectCreate(mat); 117778b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 11788a729477SBarry Smith mat->ops = &MatOps; 117944a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 118044a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 11818a729477SBarry Smith mat->factor = 0; 1182d6dfbf8fSBarry Smith 118307a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 11841eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 11851eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 11861eb62cbbSBarry Smith 1187dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 11881eb62cbbSBarry Smith work[0] = m; work[1] = n; 1189d6dfbf8fSBarry Smith MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1190dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1191dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 11921eb62cbbSBarry Smith } 1193dbd7a890SLois Curfman McInnes if (m == PETSC_DECIDE) 1194dbd7a890SLois Curfman McInnes {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1195dbd7a890SLois Curfman McInnes if (n == PETSC_DECIDE) 1196dbd7a890SLois Curfman McInnes {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 11978a729477SBarry Smith aij->m = m; 11988a729477SBarry Smith aij->n = n; 11991eb62cbbSBarry Smith aij->N = N; 12001eb62cbbSBarry Smith aij->M = M; 12011eb62cbbSBarry Smith 12021eb62cbbSBarry Smith /* build local table of row and column ownerships */ 120378b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 120478b31e54SBarry Smith CHKPTRQ(aij->rowners); 12051eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 12061eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 12071eb62cbbSBarry Smith aij->rowners[0] = 0; 12081eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 12091eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 12108a729477SBarry Smith } 12111eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 12121eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 12131eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 12141eb62cbbSBarry Smith aij->cowners[0] = 0; 12151eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 12161eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 12178a729477SBarry Smith } 12181eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 12191eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 12208a729477SBarry Smith 12218a729477SBarry Smith 12226b5873e3SBarry Smith ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 122378b31e54SBarry Smith CHKERRQ(ierr); 1224a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 12256b5873e3SBarry Smith ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 122678b31e54SBarry Smith CHKERRQ(ierr); 1227a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 12288a729477SBarry Smith 12291eb62cbbSBarry Smith /* build cache for off array entries formed */ 123078b31e54SBarry Smith ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 12319e25ed09SBarry Smith aij->colmap = 0; 12329e25ed09SBarry Smith aij->garray = 0; 12338a729477SBarry Smith 12341eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 12351eb62cbbSBarry Smith aij->lvec = 0; 12361eb62cbbSBarry Smith aij->Mvctx = 0; 1237d6dfbf8fSBarry Smith aij->assembled = 0; 12388a729477SBarry Smith 1239d6dfbf8fSBarry Smith *newmat = mat; 1240d6dfbf8fSBarry Smith return 0; 1241d6dfbf8fSBarry Smith } 1242c74985f6SBarry Smith 1243ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1244d6dfbf8fSBarry Smith { 1245d6dfbf8fSBarry Smith Mat mat; 124644a69424SLois Curfman McInnes Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 12472ee70a88SLois Curfman McInnes int ierr, len; 1248d6dfbf8fSBarry Smith *newmat = 0; 1249d6dfbf8fSBarry Smith 125078b31e54SBarry Smith if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix"); 125144a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1252a5a9c739SBarry Smith PLogObjectCreate(mat); 125378b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1254d6dfbf8fSBarry Smith mat->ops = &MatOps; 125544a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 125644a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1257d6dfbf8fSBarry Smith mat->factor = matin->factor; 1258d6dfbf8fSBarry Smith 1259d6dfbf8fSBarry Smith aij->m = oldmat->m; 1260d6dfbf8fSBarry Smith aij->n = oldmat->n; 1261d6dfbf8fSBarry Smith aij->M = oldmat->M; 1262d6dfbf8fSBarry Smith aij->N = oldmat->N; 1263d6dfbf8fSBarry Smith 1264d6dfbf8fSBarry Smith aij->assembled = 1; 1265d6dfbf8fSBarry Smith aij->rstart = oldmat->rstart; 1266d6dfbf8fSBarry Smith aij->rend = oldmat->rend; 1267d6dfbf8fSBarry Smith aij->cstart = oldmat->cstart; 1268d6dfbf8fSBarry Smith aij->cend = oldmat->cend; 1269d6dfbf8fSBarry Smith aij->numtids = oldmat->numtids; 1270d6dfbf8fSBarry Smith aij->mytid = oldmat->mytid; 127107a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 1272d6dfbf8fSBarry Smith 127378b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 127478b31e54SBarry Smith CHKPTRQ(aij->rowners); 127578b31e54SBarry Smith PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 127678b31e54SBarry Smith ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 12772ee70a88SLois Curfman McInnes if (oldmat->colmap) { 127878b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 127978b31e54SBarry Smith CHKPTRQ(aij->colmap); 128078b31e54SBarry Smith PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 12812ee70a88SLois Curfman McInnes } else aij->colmap = 0; 12822ee70a88SLois Curfman McInnes if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 128378b31e54SBarry Smith aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 128478b31e54SBarry Smith PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 12852ee70a88SLois Curfman McInnes } else aij->garray = 0; 1286d6dfbf8fSBarry Smith 128778b31e54SBarry Smith ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1288a5a9c739SBarry Smith PLogObjectParent(mat,aij->lvec); 128978b31e54SBarry Smith ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1290a5a9c739SBarry Smith PLogObjectParent(mat,aij->Mvctx); 129178b31e54SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1292a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 129378b31e54SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1294a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 12958a729477SBarry Smith *newmat = mat; 12968a729477SBarry Smith return 0; 12978a729477SBarry Smith } 1298