1cb512458SBarry Smith #ifndef lint 2*b4fd4287SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.101 1995/12/12 22:11:36 curfman Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 392ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 401eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46dbb450caSBarry Smith int shift = C->indexshift; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 53bbb6d6a8SBarry Smith if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54bbb6d6a8SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 551eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 561eb62cbbSBarry Smith row = idxm[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 58bbb6d6a8SBarry Smith if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59bbb6d6a8SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 601eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 611eb62cbbSBarry Smith col = idxn[j] - cstart; 6278b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 631eb62cbbSBarry Smith } 641eb62cbbSBarry Smith else { 65d6dfbf8fSBarry Smith if (aij->assembled) { 66b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67dbb450caSBarry Smith col = aij->colmap[idxn[j]] + shift; 68ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 692493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 702493cbb0SBarry Smith col = idxn[j]; 71d6dfbf8fSBarry Smith } 729e25ed09SBarry Smith } 739e25ed09SBarry Smith else col = idxn[j]; 7478b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 751eb62cbbSBarry Smith } 761eb62cbbSBarry Smith } 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith else { 79416022c9SBarry Smith ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 801eb62cbbSBarry Smith } 818a729477SBarry Smith } 828a729477SBarry Smith return 0; 838a729477SBarry Smith } 848a729477SBarry Smith 85b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 86b49de8d1SLois Curfman McInnes { 87b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 89b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 90b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 91b49de8d1SLois Curfman McInnes int shift = C->indexshift; 92b49de8d1SLois Curfman McInnes 93b49de8d1SLois Curfman McInnes if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 94b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 95b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 96b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 97b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 98b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 99b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 100b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 101b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 102b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 103b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 104b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 105b49de8d1SLois Curfman McInnes } 106b49de8d1SLois Curfman McInnes else { 107b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 108b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 109b49de8d1SLois Curfman McInnes } 110b49de8d1SLois Curfman McInnes } 111b49de8d1SLois Curfman McInnes } 112b49de8d1SLois Curfman McInnes else { 113b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes } 116b49de8d1SLois Curfman McInnes return 0; 117b49de8d1SLois Curfman McInnes } 118b49de8d1SLois Curfman McInnes 119fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1208a729477SBarry Smith { 12144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 122d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 12317699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 12417699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1251eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1266abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1271eb62cbbSBarry Smith InsertMode addv; 1281eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1291eb62cbbSBarry Smith 1301eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 131d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 132dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 133bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1341eb62cbbSBarry Smith } 1351eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1361eb62cbbSBarry Smith 1371eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1380452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 139cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1400452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1411eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1421eb62cbbSBarry Smith idx = aij->stash.idx[i]; 14317699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1441eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1451eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1468a729477SBarry Smith } 1478a729477SBarry Smith } 1488a729477SBarry Smith } 14917699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1501eb62cbbSBarry Smith 1511eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1520452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 15317699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 15417699dbbSLois Curfman McInnes nreceives = work[rank]; 15517699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 15617699dbbSLois Curfman McInnes nmax = work[rank]; 1570452661fSBarry Smith PetscFree(work); 1581eb62cbbSBarry Smith 1591eb62cbbSBarry Smith /* post receives: 1601eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1611eb62cbbSBarry Smith (global index,value) we store the global index as a double 162d6dfbf8fSBarry Smith to simplify the message passing. 1631eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1641eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1651eb62cbbSBarry Smith this is a lot of wasted space. 1661eb62cbbSBarry Smith 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith This could be done better. 1691eb62cbbSBarry Smith */ 1700452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 17178b31e54SBarry Smith CHKPTRQ(rvalues); 1720452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 17378b31e54SBarry Smith CHKPTRQ(recv_waits); 1741eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 175416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1761eb62cbbSBarry Smith comm,recv_waits+i); 1771eb62cbbSBarry Smith } 1781eb62cbbSBarry Smith 1791eb62cbbSBarry Smith /* do sends: 1801eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1811eb62cbbSBarry Smith the ith processor 1821eb62cbbSBarry Smith */ 1830452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1840452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 18578b31e54SBarry Smith CHKPTRQ(send_waits); 1860452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1871eb62cbbSBarry Smith starts[0] = 0; 18817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1891eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1901eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1911eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1921eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1931eb62cbbSBarry Smith } 1940452661fSBarry Smith PetscFree(owner); 1951eb62cbbSBarry Smith starts[0] = 0; 19617699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1971eb62cbbSBarry Smith count = 0; 19817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1991eb62cbbSBarry Smith if (procs[i]) { 200416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2011eb62cbbSBarry Smith comm,send_waits+count++); 2021eb62cbbSBarry Smith } 2031eb62cbbSBarry Smith } 2040452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2051eb62cbbSBarry Smith 2061eb62cbbSBarry Smith /* Free cache space */ 20778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2081eb62cbbSBarry Smith 2091eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2101eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2111eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2121eb62cbbSBarry Smith aij->rmax = nmax; 2131eb62cbbSBarry Smith 2141eb62cbbSBarry Smith return 0; 2151eb62cbbSBarry Smith } 21644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2171eb62cbbSBarry Smith 218fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2191eb62cbbSBarry Smith { 22044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 221dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2221eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 223416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 224416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2251eb62cbbSBarry Smith Scalar *values,val; 2261eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2271eb62cbbSBarry Smith 2281eb62cbbSBarry Smith /* wait on receives */ 2291eb62cbbSBarry Smith while (count) { 230d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2311eb62cbbSBarry Smith /* unpack receives into our local space */ 232d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 233c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2341eb62cbbSBarry Smith n = n/3; 2351eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2361eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2371eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2381eb62cbbSBarry Smith val = values[3*i+2]; 2391eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2401eb62cbbSBarry Smith col -= aij->cstart; 2411eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2421eb62cbbSBarry Smith } 2431eb62cbbSBarry Smith else { 244d6dfbf8fSBarry Smith if (aij->assembled) { 245b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 246dbb450caSBarry Smith col = aij->colmap[col] + shift; 247ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2482493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2492493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 250d6dfbf8fSBarry Smith } 2519e25ed09SBarry Smith } 2521eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2531eb62cbbSBarry Smith } 2541eb62cbbSBarry Smith } 2551eb62cbbSBarry Smith count--; 2561eb62cbbSBarry Smith } 2570452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2581eb62cbbSBarry Smith 2591eb62cbbSBarry Smith /* wait on sends */ 2601eb62cbbSBarry Smith if (aij->nsends) { 2610452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 26278b31e54SBarry Smith CHKPTRQ(send_status); 2631eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2640452661fSBarry Smith PetscFree(send_status); 2651eb62cbbSBarry Smith } 2660452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2671eb62cbbSBarry Smith 26864119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 26978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 27078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2711eb62cbbSBarry Smith 2722493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2732493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 274416022c9SBarry Smith MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 2752493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2762493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2772493cbb0SBarry Smith } 2782493cbb0SBarry Smith 2795e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 28078b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 281d6dfbf8fSBarry Smith aij->assembled = 1; 2825e42470aSBarry Smith } 28378b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 28478b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2855e42470aSBarry Smith 2868a729477SBarry Smith return 0; 2878a729477SBarry Smith } 2888a729477SBarry Smith 2892ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2901eb62cbbSBarry Smith { 29144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 292dbd7a890SLois Curfman McInnes int ierr; 29378b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 29478b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2951eb62cbbSBarry Smith return 0; 2961eb62cbbSBarry Smith } 2971eb62cbbSBarry Smith 2981eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2991eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3001eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3011eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3021eb62cbbSBarry Smith routine. 3031eb62cbbSBarry Smith */ 30444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3051eb62cbbSBarry Smith { 30644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 30717699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3086abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 30917699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3105392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 311d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 312d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3131eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3141eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3151eb62cbbSBarry Smith IS istmp; 3161eb62cbbSBarry Smith 31748d91487SBarry Smith if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 31878b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 31978b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3201eb62cbbSBarry Smith 3211eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3220452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3240452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3251eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3261eb62cbbSBarry Smith idx = rows[i]; 3271eb62cbbSBarry Smith found = 0; 32817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3291eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3301eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3311eb62cbbSBarry Smith } 3321eb62cbbSBarry Smith } 333bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3341eb62cbbSBarry Smith } 33517699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3380452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 33917699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34017699dbbSLois Curfman McInnes nrecvs = work[rank]; 34117699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 34217699dbbSLois Curfman McInnes nmax = work[rank]; 3430452661fSBarry Smith PetscFree(work); 3441eb62cbbSBarry Smith 3451eb62cbbSBarry Smith /* post receives: */ 3460452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 34778b31e54SBarry Smith CHKPTRQ(rvalues); 3480452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 34978b31e54SBarry Smith CHKPTRQ(recv_waits); 3501eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 351416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3521eb62cbbSBarry Smith } 3531eb62cbbSBarry Smith 3541eb62cbbSBarry Smith /* do sends: 3551eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3561eb62cbbSBarry Smith the ith processor 3571eb62cbbSBarry Smith */ 3580452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3590452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36078b31e54SBarry Smith CHKPTRQ(send_waits); 3610452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3621eb62cbbSBarry Smith starts[0] = 0; 36317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3641eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3651eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3661eb62cbbSBarry Smith } 3671eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3681eb62cbbSBarry Smith 3691eb62cbbSBarry Smith starts[0] = 0; 37017699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3711eb62cbbSBarry Smith count = 0; 37217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3731eb62cbbSBarry Smith if (procs[i]) { 374416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3751eb62cbbSBarry Smith } 3761eb62cbbSBarry Smith } 3770452661fSBarry Smith PetscFree(starts); 3781eb62cbbSBarry Smith 37917699dbbSLois Curfman McInnes base = owners[rank]; 3801eb62cbbSBarry Smith 3811eb62cbbSBarry Smith /* wait on receives */ 3820452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3831eb62cbbSBarry Smith source = lens + nrecvs; 3841eb62cbbSBarry Smith count = nrecvs; slen = 0; 3851eb62cbbSBarry Smith while (count) { 386d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3871eb62cbbSBarry Smith /* unpack receives into our local space */ 3881eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 389d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 390d6dfbf8fSBarry Smith lens[imdex] = n; 3911eb62cbbSBarry Smith slen += n; 3921eb62cbbSBarry Smith count--; 3931eb62cbbSBarry Smith } 3940452661fSBarry Smith PetscFree(recv_waits); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* move the data into the send scatter */ 3970452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3981eb62cbbSBarry Smith count = 0; 3991eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4001eb62cbbSBarry Smith values = rvalues + i*nmax; 4011eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4021eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4031eb62cbbSBarry Smith } 4041eb62cbbSBarry Smith } 4050452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4060452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4071eb62cbbSBarry Smith 4081eb62cbbSBarry Smith /* actually zap the local rows */ 409416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 410464493b3SBarry Smith PLogObjectParent(A,istmp); 4110452661fSBarry Smith PetscFree(lrows); 41278b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 41378b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 41478b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4151eb62cbbSBarry Smith 4161eb62cbbSBarry Smith /* wait on sends */ 4171eb62cbbSBarry Smith if (nsends) { 4180452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 41978b31e54SBarry Smith CHKPTRQ(send_status); 4201eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4210452661fSBarry Smith PetscFree(send_status); 4221eb62cbbSBarry Smith } 4230452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4241eb62cbbSBarry Smith 4251eb62cbbSBarry Smith return 0; 4261eb62cbbSBarry Smith } 4271eb62cbbSBarry Smith 428416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4291eb62cbbSBarry Smith { 430416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4311eb62cbbSBarry Smith int ierr; 432416022c9SBarry Smith 43348d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 43464119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 435416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 43664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 437416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4381eb62cbbSBarry Smith return 0; 4391eb62cbbSBarry Smith } 4401eb62cbbSBarry Smith 441416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 442da3a660dSBarry Smith { 443416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444da3a660dSBarry Smith int ierr; 44548d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 44664119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 44864119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 449416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 450da3a660dSBarry Smith return 0; 451da3a660dSBarry Smith } 452da3a660dSBarry Smith 453416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 454da3a660dSBarry Smith { 455416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 456da3a660dSBarry Smith int ierr; 457da3a660dSBarry Smith 45848d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 459da3a660dSBarry Smith /* do nondiagonal part */ 460416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 461da3a660dSBarry Smith /* send it on its way */ 462416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46364119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 464da3a660dSBarry Smith /* do local part */ 465416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 466da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 467da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 468da3a660dSBarry Smith /* but is not perhaps always true. */ 469416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47064119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 471da3a660dSBarry Smith return 0; 472da3a660dSBarry Smith } 473da3a660dSBarry Smith 474416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 475da3a660dSBarry Smith { 476416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 477da3a660dSBarry Smith int ierr; 478da3a660dSBarry Smith 47948d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 480da3a660dSBarry Smith /* do nondiagonal part */ 481416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 482da3a660dSBarry Smith /* send it on its way */ 483416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 485da3a660dSBarry Smith /* do local part */ 486416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 487da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 488da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 489da3a660dSBarry Smith /* but is not perhaps always true. */ 490416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 492da3a660dSBarry Smith return 0; 493da3a660dSBarry Smith } 494da3a660dSBarry Smith 4951eb62cbbSBarry Smith /* 4961eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4971eb62cbbSBarry Smith diagonal block 4981eb62cbbSBarry Smith */ 499416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5001eb62cbbSBarry Smith { 501416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 50248d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 503416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5041eb62cbbSBarry Smith } 5051eb62cbbSBarry Smith 50644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5071eb62cbbSBarry Smith { 5081eb62cbbSBarry Smith Mat mat = (Mat) obj; 50944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5101eb62cbbSBarry Smith int ierr; 511a5a9c739SBarry Smith #if defined(PETSC_LOG) 5126e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 513a5a9c739SBarry Smith #endif 5140452661fSBarry Smith PetscFree(aij->rowners); 51578b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 51678b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5170452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5180452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5191eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 520a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5210452661fSBarry Smith PetscFree(aij); 522a5a9c739SBarry Smith PLogObjectDestroy(mat); 5230452661fSBarry Smith PetscHeaderDestroy(mat); 5241eb62cbbSBarry Smith return 0; 5251eb62cbbSBarry Smith } 5267857610eSBarry Smith #include "draw.h" 527b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 528ee50ffe9SBarry Smith 529416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5301eb62cbbSBarry Smith { 531416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 532416022c9SBarry Smith int ierr; 533416022c9SBarry Smith 53417699dbbSLois Curfman McInnes if (aij->size == 1) { 535416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 536416022c9SBarry Smith } 537a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 538416022c9SBarry Smith return 0; 539416022c9SBarry Smith } 540416022c9SBarry Smith 541d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 542416022c9SBarry Smith { 54344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 545a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 546d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 547d636dbe3SBarry Smith FILE *fd; 548416022c9SBarry Smith 549416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 550416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 55108480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 552a56f8943SBarry Smith int nz,nzalloc,mem; 553a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 554a56f8943SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 555a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 556a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 557a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 558a56f8943SBarry Smith nzalloc,mem); 55908480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 56008480c60SBarry Smith fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 56108480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 56208480c60SBarry Smith fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 563a56f8943SBarry Smith fflush(fd); 564a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 565a56f8943SBarry Smith ierr = VecScatterView(aij->Mvctx,viewer); 566416022c9SBarry Smith return 0; 567416022c9SBarry Smith } 56808480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 56908480c60SBarry Smith return 0; 57008480c60SBarry Smith } 571416022c9SBarry Smith } 572416022c9SBarry Smith 573416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 574d636dbe3SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 5757f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 576d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 57717699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5781eb62cbbSBarry Smith aij->cend); 57978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 58078b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 581d13ab20cSBarry Smith fflush(fd); 5827f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 583d13ab20cSBarry Smith } 584416022c9SBarry Smith else { 585a56f8943SBarry Smith int size = aij->size; 586a56f8943SBarry Smith rank = aij->rank; 58717699dbbSLois Curfman McInnes if (size == 1) { 58878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 58995373324SBarry Smith } 59095373324SBarry Smith else { 59195373324SBarry Smith /* assemble the entire matrix onto first processor. */ 59295373324SBarry Smith Mat A; 593ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 5942eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 59595373324SBarry Smith Scalar *a; 5962ee70a88SLois Curfman McInnes 59717699dbbSLois Curfman McInnes if (!rank) { 598*b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 599c451ab25SLois Curfman McInnes CHKERRQ(ierr); 60095373324SBarry Smith } 60195373324SBarry Smith else { 602*b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 603c451ab25SLois Curfman McInnes CHKERRQ(ierr); 60495373324SBarry Smith } 605464493b3SBarry Smith PLogObjectParent(mat,A); 606416022c9SBarry Smith 60795373324SBarry Smith /* copy over the A part */ 608ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6092ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 61095373324SBarry Smith row = aij->rstart; 611dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 61295373324SBarry Smith for ( i=0; i<m; i++ ) { 613416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 61495373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 61595373324SBarry Smith } 6162ee70a88SLois Curfman McInnes aj = Aloc->j; 617dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 61895373324SBarry Smith 61995373324SBarry Smith /* copy over the B part */ 620ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6212ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62295373324SBarry Smith row = aij->rstart; 6230452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 624dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 62595373324SBarry Smith for ( i=0; i<m; i++ ) { 626416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 62795373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 62895373324SBarry Smith } 6290452661fSBarry Smith PetscFree(ct); 63078b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 63178b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 63217699dbbSLois Curfman McInnes if (!rank) { 63378b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 63495373324SBarry Smith } 63578b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 63695373324SBarry Smith } 63795373324SBarry Smith } 6381eb62cbbSBarry Smith return 0; 6391eb62cbbSBarry Smith } 6401eb62cbbSBarry Smith 641416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 642416022c9SBarry Smith { 643416022c9SBarry Smith Mat mat = (Mat) obj; 644416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 645416022c9SBarry Smith int ierr; 646416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 647416022c9SBarry Smith 64848d91487SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 649416022c9SBarry Smith if (!viewer) { 650416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 651416022c9SBarry Smith } 652416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 653416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 654d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 655416022c9SBarry Smith } 656416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 657d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 658d7e8b826SBarry Smith } 659d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 660d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 661416022c9SBarry Smith } 662416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 663d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 664416022c9SBarry Smith } 665416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 666416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 667416022c9SBarry Smith } 668416022c9SBarry Smith return 0; 669416022c9SBarry Smith } 670416022c9SBarry Smith 671ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6721eb62cbbSBarry Smith /* 6731eb62cbbSBarry Smith This has to provide several versions. 6741eb62cbbSBarry Smith 6751eb62cbbSBarry Smith 1) per sequential 6761eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6771eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 678d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6791eb62cbbSBarry Smith */ 680fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 681dbb450caSBarry Smith double fshift,int its,Vec xx) 6828a729477SBarry Smith { 68344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 684d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 685ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6866abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6876abc6512SBarry Smith int ierr,*idx, *diag; 688416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 689da3a660dSBarry Smith Vec tt; 6908a729477SBarry Smith 69148d91487SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 6921eb62cbbSBarry Smith 693d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 694dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 695dbb450caSBarry Smith ls = ls + shift; 696ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 697d6dfbf8fSBarry Smith diag = A->diag; 698acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 69948d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 700acb40c82SBarry Smith } 701da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 702da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 703da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 704da3a660dSBarry Smith 705da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 706da3a660dSBarry Smith 707da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 708da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 709da3a660dSBarry Smith is the relaxation factor. 710da3a660dSBarry Smith */ 71178b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 712464493b3SBarry Smith PLogObjectParent(matin,tt); 713da3a660dSBarry Smith VecGetArray(tt,&t); 714da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 715da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 716da3a660dSBarry Smith VecSet(&zero,mat->lvec); 71764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 71878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 719da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 720da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 721dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 722dbb450caSBarry Smith v = A->a + diag[i] + !shift; 723da3a660dSBarry Smith sum = b[i]; 724da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 725dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 726da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 727dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 728dbb450caSBarry Smith v = B->a + B->i[i] + shift; 729da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 730da3a660dSBarry Smith x[i] = omega*(sum/d); 731da3a660dSBarry Smith } 73264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 73378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 734da3a660dSBarry Smith 735da3a660dSBarry Smith /* t = b - (2*E - D)x */ 736da3a660dSBarry Smith v = A->a; 737dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 738da3a660dSBarry Smith 739da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 740dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 741da3a660dSBarry Smith diag = A->diag; 742da3a660dSBarry Smith VecSet(&zero,mat->lvec); 74364119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 74478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 745da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 746da3a660dSBarry Smith n = diag[i] - A->i[i]; 747dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 748dbb450caSBarry Smith v = A->a + A->i[i] + shift; 749da3a660dSBarry Smith sum = t[i]; 750da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 751dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 752da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 753dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 754dbb450caSBarry Smith v = B->a + B->i[i] + shift; 755da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 756da3a660dSBarry Smith t[i] = omega*(sum/d); 757da3a660dSBarry Smith } 75864119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 760da3a660dSBarry Smith /* x = x + t */ 761da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 762da3a660dSBarry Smith VecDestroy(tt); 763da3a660dSBarry Smith return 0; 764da3a660dSBarry Smith } 765da3a660dSBarry Smith 7661eb62cbbSBarry Smith 767d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 768da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 769da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 770da3a660dSBarry Smith } 771da3a660dSBarry Smith else { 77264119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 77378b31e54SBarry Smith CHKERRQ(ierr); 77464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 77578b31e54SBarry Smith CHKERRQ(ierr); 776da3a660dSBarry Smith } 777d6dfbf8fSBarry Smith while (its--) { 778d6dfbf8fSBarry Smith /* go down through the rows */ 77964119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 78078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 781d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 782d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 783dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 784dbb450caSBarry Smith v = A->a + A->i[i] + shift; 785d6dfbf8fSBarry Smith sum = b[i]; 786d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 787dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 788d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 789dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 790dbb450caSBarry Smith v = B->a + B->i[i] + shift; 791d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 792d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 793d6dfbf8fSBarry Smith } 79464119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 79578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 796d6dfbf8fSBarry Smith /* come up through the rows */ 79764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 79878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 799d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 800d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 801dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 802dbb450caSBarry Smith v = A->a + A->i[i] + shift; 803d6dfbf8fSBarry Smith sum = b[i]; 804d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 805dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 806d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 807dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 808dbb450caSBarry Smith v = B->a + B->i[i] + shift; 809d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 810d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 811d6dfbf8fSBarry Smith } 81264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 81378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 814d6dfbf8fSBarry Smith } 815d6dfbf8fSBarry Smith } 816d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 817da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 818da3a660dSBarry Smith VecSet(&zero,mat->lvec); 81964119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 82078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 821da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 822da3a660dSBarry Smith n = diag[i] - A->i[i]; 823dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 824dbb450caSBarry Smith v = A->a + A->i[i] + shift; 825da3a660dSBarry Smith sum = b[i]; 826da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 827dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 828da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 829dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 830dbb450caSBarry Smith v = B->a + B->i[i] + shift; 831da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 832da3a660dSBarry Smith x[i] = omega*(sum/d); 833da3a660dSBarry Smith } 83464119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 83578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 836da3a660dSBarry Smith its--; 837da3a660dSBarry Smith } 838d6dfbf8fSBarry Smith while (its--) { 83964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84078b31e54SBarry Smith CHKERRQ(ierr); 84164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84278b31e54SBarry Smith CHKERRQ(ierr); 84364119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 845d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 846d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 847dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 848dbb450caSBarry Smith v = A->a + A->i[i] + shift; 849d6dfbf8fSBarry Smith sum = b[i]; 850d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 851dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 852d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 853dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 854dbb450caSBarry Smith v = B->a + B->i[i] + shift; 855d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 856d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 857d6dfbf8fSBarry Smith } 85864119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 860d6dfbf8fSBarry Smith } 861d6dfbf8fSBarry Smith } 862d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 863da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 864da3a660dSBarry Smith VecSet(&zero,mat->lvec); 86564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 86678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 867da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 868da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 869dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 870dbb450caSBarry Smith v = A->a + diag[i] + !shift; 871da3a660dSBarry Smith sum = b[i]; 872da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 873dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 874da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 875dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 876dbb450caSBarry Smith v = B->a + B->i[i] + shift; 877da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 878da3a660dSBarry Smith x[i] = omega*(sum/d); 879da3a660dSBarry Smith } 88064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 882da3a660dSBarry Smith its--; 883da3a660dSBarry Smith } 884d6dfbf8fSBarry Smith while (its--) { 88564119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 88678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 88764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 88878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 88964119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 89078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 891d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 892d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 893dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 894dbb450caSBarry Smith v = A->a + A->i[i] + shift; 895d6dfbf8fSBarry Smith sum = b[i]; 896d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 897dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 898d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 899dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 900dbb450caSBarry Smith v = B->a + B->i[i] + shift; 901d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 902d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 903d6dfbf8fSBarry Smith } 90464119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 90578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 906d6dfbf8fSBarry Smith } 907d6dfbf8fSBarry Smith } 908d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 909da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 910dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 911da3a660dSBarry Smith } 91264119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 91378b31e54SBarry Smith CHKERRQ(ierr); 91464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 91578b31e54SBarry Smith CHKERRQ(ierr); 916d6dfbf8fSBarry Smith while (its--) { 917d6dfbf8fSBarry Smith /* go down through the rows */ 918d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 919d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 920dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 921dbb450caSBarry Smith v = A->a + A->i[i] + shift; 922d6dfbf8fSBarry Smith sum = b[i]; 923d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 924dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 925d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 926dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 927dbb450caSBarry Smith v = B->a + B->i[i] + shift; 928d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 929d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 930d6dfbf8fSBarry Smith } 931d6dfbf8fSBarry Smith /* come up through the rows */ 932d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 933d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 934dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 935dbb450caSBarry Smith v = A->a + A->i[i] + shift; 936d6dfbf8fSBarry Smith sum = b[i]; 937d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 938dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 939d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 940dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 941dbb450caSBarry Smith v = B->a + B->i[i] + shift; 942d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 943d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 944d6dfbf8fSBarry Smith } 945d6dfbf8fSBarry Smith } 946d6dfbf8fSBarry Smith } 947d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 948da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 949dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 950da3a660dSBarry Smith } 95164119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 95278b31e54SBarry Smith CHKERRQ(ierr); 95364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 95478b31e54SBarry Smith CHKERRQ(ierr); 955d6dfbf8fSBarry Smith while (its--) { 956d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 957d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 958dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 959dbb450caSBarry Smith v = A->a + A->i[i] + shift; 960d6dfbf8fSBarry Smith sum = b[i]; 961d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 962dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 963d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 964dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 965dbb450caSBarry Smith v = B->a + B->i[i] + shift; 966d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 967d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 968d6dfbf8fSBarry Smith } 969d6dfbf8fSBarry Smith } 970d6dfbf8fSBarry Smith } 971d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 972da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 973dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 974da3a660dSBarry Smith } 97564119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 97678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 97764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 97878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 979d6dfbf8fSBarry Smith while (its--) { 980d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 981d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 982dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 983dbb450caSBarry Smith v = A->a + A->i[i] + shift; 984d6dfbf8fSBarry Smith sum = b[i]; 985d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 986dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 987d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 988dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 989dbb450caSBarry Smith v = B->a + B->i[i] + shift; 990d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 991d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 992d6dfbf8fSBarry Smith } 993d6dfbf8fSBarry Smith } 994d6dfbf8fSBarry Smith } 9958a729477SBarry Smith return 0; 9968a729477SBarry Smith } 997a66be287SLois Curfman McInnes 998fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 999fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1000a66be287SLois Curfman McInnes { 1001a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1002a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1003a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1004a66be287SLois Curfman McInnes 100578b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 100678b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1007a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1008a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1009a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1010a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1011d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1012a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1013a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1014d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1015a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1016a66be287SLois Curfman McInnes } 1017a66be287SLois Curfman McInnes return 0; 1018a66be287SLois Curfman McInnes } 1019a66be287SLois Curfman McInnes 1020299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1021299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1022299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1023299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1024299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1025299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1026299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1027299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1028299609e3SLois Curfman McInnes 1029416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1030c74985f6SBarry Smith { 1031c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1032c74985f6SBarry Smith 1033c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1034c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1035c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1036c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1037c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1038c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1039c74985f6SBarry Smith } 1040c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1041c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1042c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1043c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1044c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1045bbb6d6a8SBarry Smith else if (op == COLUMN_ORIENTED) 10464d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1047c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10484d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1049c0bbcb79SLois Curfman McInnes else 10504d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1051c74985f6SBarry Smith return 0; 1052c74985f6SBarry Smith } 1053c74985f6SBarry Smith 1054d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1055c74985f6SBarry Smith { 105644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1057c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1058c74985f6SBarry Smith return 0; 1059c74985f6SBarry Smith } 1060c74985f6SBarry Smith 1061d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1062c74985f6SBarry Smith { 106344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1064b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1065c74985f6SBarry Smith return 0; 1066c74985f6SBarry Smith } 1067c74985f6SBarry Smith 1068d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1069c74985f6SBarry Smith { 107044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1071c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1072c74985f6SBarry Smith return 0; 1073c74985f6SBarry Smith } 1074c74985f6SBarry Smith 107539e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 107639e00950SLois Curfman McInnes { 1077154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1078154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1079154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1080154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 108139e00950SLois Curfman McInnes 1082b49de8d1SLois Curfman McInnes if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1083b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1084abc0e9e4SLois Curfman McInnes lrow = row - rstart; 108539e00950SLois Curfman McInnes 1086154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1087154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1088154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 108978b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 109078b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1091154123eaSLois Curfman McInnes nztot = nzA + nzB; 1092154123eaSLois Curfman McInnes 1093154123eaSLois Curfman McInnes if (v || idx) { 1094154123eaSLois Curfman McInnes if (nztot) { 1095154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1096299609e3SLois Curfman McInnes int imark; 109748b35521SBarry Smith if (mat->assembled) { 1098154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 109948b35521SBarry Smith } 1100154123eaSLois Curfman McInnes if (v) { 11010452661fSBarry Smith *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 110239e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1103154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1104154123eaSLois Curfman McInnes else break; 1105154123eaSLois Curfman McInnes } 1106154123eaSLois Curfman McInnes imark = i; 1107154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1108299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1109154123eaSLois Curfman McInnes } 1110154123eaSLois Curfman McInnes if (idx) { 11110452661fSBarry Smith *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1112154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1113154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1114154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1115154123eaSLois Curfman McInnes else break; 1116154123eaSLois Curfman McInnes } 1117154123eaSLois Curfman McInnes imark = i; 1118154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1119299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 112039e00950SLois Curfman McInnes } 112139e00950SLois Curfman McInnes } 112239e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1123154123eaSLois Curfman McInnes } 112439e00950SLois Curfman McInnes *nz = nztot; 112578b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 112678b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 112739e00950SLois Curfman McInnes return 0; 112839e00950SLois Curfman McInnes } 112939e00950SLois Curfman McInnes 113039e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 113139e00950SLois Curfman McInnes { 11320452661fSBarry Smith if (idx) PetscFree(*idx); 11330452661fSBarry Smith if (v) PetscFree(*v); 113439e00950SLois Curfman McInnes return 0; 113539e00950SLois Curfman McInnes } 113639e00950SLois Curfman McInnes 1137cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1138855ac2c5SLois Curfman McInnes { 1139855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1140ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1141416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1142416022c9SBarry Smith double sum = 0.0; 114304ca555eSLois Curfman McInnes Scalar *v; 114404ca555eSLois Curfman McInnes 1145416022c9SBarry Smith if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 114617699dbbSLois Curfman McInnes if (aij->size == 1) { 114714183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 114837fa93a5SLois Curfman McInnes } else { 114904ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 115004ca555eSLois Curfman McInnes v = amat->a; 115104ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 115204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 115304ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 115404ca555eSLois Curfman McInnes #else 115504ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 115604ca555eSLois Curfman McInnes #endif 115704ca555eSLois Curfman McInnes } 115804ca555eSLois Curfman McInnes v = bmat->a; 115904ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 116004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116104ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116204ca555eSLois Curfman McInnes #else 116304ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 116404ca555eSLois Curfman McInnes #endif 116504ca555eSLois Curfman McInnes } 116604ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 116704ca555eSLois Curfman McInnes *norm = sqrt(*norm); 116804ca555eSLois Curfman McInnes } 116904ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 117004ca555eSLois Curfman McInnes double *tmp, *tmp2; 117104ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11720452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11730452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1174cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 117504ca555eSLois Curfman McInnes *norm = 0.0; 117604ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 117704ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1178579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 117904ca555eSLois Curfman McInnes } 118004ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 118104ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1182579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 118304ca555eSLois Curfman McInnes } 118404ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 118504ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 118604ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 118704ca555eSLois Curfman McInnes } 11880452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 118904ca555eSLois Curfman McInnes } 119004ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1191515d9167SLois Curfman McInnes double ntemp = 0.0; 119204ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1193dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 119404ca555eSLois Curfman McInnes sum = 0.0; 119504ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1196cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 119704ca555eSLois Curfman McInnes } 1198dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 119904ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1200cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120104ca555eSLois Curfman McInnes } 1202515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 120304ca555eSLois Curfman McInnes } 1204515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 120504ca555eSLois Curfman McInnes } 120604ca555eSLois Curfman McInnes else { 1207515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 120804ca555eSLois Curfman McInnes } 120937fa93a5SLois Curfman McInnes } 1210855ac2c5SLois Curfman McInnes return 0; 1211855ac2c5SLois Curfman McInnes } 1212855ac2c5SLois Curfman McInnes 12130de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1214b7c46309SBarry Smith { 1215b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1216dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1217416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1218416022c9SBarry Smith Mat B; 1219b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1220b7c46309SBarry Smith Scalar *array; 1221b7c46309SBarry Smith 1222416022c9SBarry Smith if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1223*b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1224*b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1225b7c46309SBarry Smith 1226b7c46309SBarry Smith /* copy over the A part */ 1227ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1228b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1229b7c46309SBarry Smith row = a->rstart; 1230dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1231b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1232416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1233b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1234b7c46309SBarry Smith } 1235b7c46309SBarry Smith aj = Aloc->j; 1236dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1237b7c46309SBarry Smith 1238b7c46309SBarry Smith /* copy over the B part */ 1239ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1240b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1241b7c46309SBarry Smith row = a->rstart; 12420452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1243dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1244b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1245416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1246b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1247b7c46309SBarry Smith } 12480452661fSBarry Smith PetscFree(ct); 1249b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1250b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12510de55854SLois Curfman McInnes if (matout) { 12520de55854SLois Curfman McInnes *matout = B; 12530de55854SLois Curfman McInnes } else { 12540de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12550452661fSBarry Smith PetscFree(a->rowners); 12560de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12570de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12580452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12590452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12600de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1261a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12620452661fSBarry Smith PetscFree(a); 1263416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12640452661fSBarry Smith PetscHeaderDestroy(B); 12650de55854SLois Curfman McInnes } 1266b7c46309SBarry Smith return 0; 1267b7c46309SBarry Smith } 1268b7c46309SBarry Smith 1269fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1270a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1271d6dfbf8fSBarry Smith 12728a729477SBarry Smith /* -------------------------------------------------------------------*/ 12732ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 127439e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 127544a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 127644a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1277299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1278299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1279299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 128044a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1281b7c46309SBarry Smith MatTranspose_MPIAIJ, 1282a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1283855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1284ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12851eb62cbbSBarry Smith 0, 1286299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1287299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1288299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1289d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1290299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1291b49de8d1SLois Curfman McInnes 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ,0,0, 1292b49de8d1SLois Curfman McInnes 0,0,0, 1293b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIAIJ}; 12948a729477SBarry Smith 12951987afe7SBarry Smith /*@C 1296ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1297ff756334SLois Curfman McInnes (the default parallel PETSc format). 12988a729477SBarry Smith 12998a729477SBarry Smith Input Parameters: 13001eb62cbbSBarry Smith . comm - MPI communicator 13017d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13027d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13037d3e4905SLois Curfman McInnes if N is given) 13047d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13057d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13067d3e4905SLois Curfman McInnes if n is given) 1307ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1308ff756334SLois Curfman McInnes (same for all local rows) 1309ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1310ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1311ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1312ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1313ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1314ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1315ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13168a729477SBarry Smith 1317ff756334SLois Curfman McInnes Output Parameter: 13188a729477SBarry Smith . newmat - the matrix 13198a729477SBarry Smith 1320ff756334SLois Curfman McInnes Notes: 1321ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1322ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13230002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13240002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1325ff756334SLois Curfman McInnes 1326ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1327ff756334SLois Curfman McInnes (possibly both). 1328ff756334SLois Curfman McInnes 1329e0245417SLois Curfman McInnes Storage Information: 1330e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1331e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1332e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1333e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1334e0245417SLois Curfman McInnes 1335e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1336c451ab25SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set d_nz=0 1337*b4fd4287SBarry Smith and d_nnz=PETSC_NULL for PETSc to control dynamic memory allocation. 1338e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1339e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1340ff756334SLois Curfman McInnes 1341c451ab25SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 1342c451ab25SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 1343c451ab25SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 1344c451ab25SLois Curfman McInnes 1345c451ab25SLois Curfman McInnes Options Database Keys: 1346c451ab25SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 1347c451ab25SLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1348c451ab25SLois Curfman McInnes 1349dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1350ff756334SLois Curfman McInnes 1351fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13528a729477SBarry Smith @*/ 13531eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13541eb62cbbSBarry Smith int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 13558a729477SBarry Smith { 13568a729477SBarry Smith Mat mat; 1357416022c9SBarry Smith Mat_MPIAIJ *a; 13586abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1359416022c9SBarry Smith 13608a729477SBarry Smith *newmat = 0; 13610452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1362a5a9c739SBarry Smith PLogObjectCreate(mat); 13630452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1364416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 136544a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 136644a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 13678a729477SBarry Smith mat->factor = 0; 1368d6dfbf8fSBarry Smith 136964119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 137017699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 137117699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 13721eb62cbbSBarry Smith 1373*b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 13741987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 13751987afe7SBarry Smith 1376dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13771eb62cbbSBarry Smith work[0] = m; work[1] = n; 1378d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1379dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1380dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13811eb62cbbSBarry Smith } 138217699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 138317699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1384416022c9SBarry Smith a->m = m; 1385416022c9SBarry Smith a->n = n; 1386416022c9SBarry Smith a->N = N; 1387416022c9SBarry Smith a->M = M; 13881eb62cbbSBarry Smith 13891eb62cbbSBarry Smith /* build local table of row and column ownerships */ 13900452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1391579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 139217699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1393416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1394416022c9SBarry Smith a->rowners[0] = 0; 139517699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1396416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 13978a729477SBarry Smith } 139817699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 139917699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1400416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1401416022c9SBarry Smith a->cowners[0] = 0; 140217699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1403416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 14048a729477SBarry Smith } 140517699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 140617699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 14078a729477SBarry Smith 1408416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1409416022c9SBarry Smith PLogObjectParent(mat,a->A); 1410416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1411416022c9SBarry Smith PLogObjectParent(mat,a->B); 14128a729477SBarry Smith 14131eb62cbbSBarry Smith /* build cache for off array entries formed */ 1414416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1415416022c9SBarry Smith a->colmap = 0; 1416416022c9SBarry Smith a->garray = 0; 14178a729477SBarry Smith 14181eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1419416022c9SBarry Smith a->lvec = 0; 1420416022c9SBarry Smith a->Mvctx = 0; 1421416022c9SBarry Smith a->assembled = 0; 14228a729477SBarry Smith 1423d6dfbf8fSBarry Smith *newmat = mat; 1424d6dfbf8fSBarry Smith return 0; 1425d6dfbf8fSBarry Smith } 1426c74985f6SBarry Smith 1427a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1428d6dfbf8fSBarry Smith { 1429d6dfbf8fSBarry Smith Mat mat; 1430416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 14312ee70a88SLois Curfman McInnes int ierr, len; 1432d6dfbf8fSBarry Smith 1433416022c9SBarry Smith if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1434416022c9SBarry Smith *newmat = 0; 14350452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1436a5a9c739SBarry Smith PLogObjectCreate(mat); 14370452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1438416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 143944a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 144044a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1441d6dfbf8fSBarry Smith mat->factor = matin->factor; 1442d6dfbf8fSBarry Smith 1443416022c9SBarry Smith a->m = oldmat->m; 1444416022c9SBarry Smith a->n = oldmat->n; 1445416022c9SBarry Smith a->M = oldmat->M; 1446416022c9SBarry Smith a->N = oldmat->N; 1447d6dfbf8fSBarry Smith 1448416022c9SBarry Smith a->assembled = 1; 1449416022c9SBarry Smith a->rstart = oldmat->rstart; 1450416022c9SBarry Smith a->rend = oldmat->rend; 1451416022c9SBarry Smith a->cstart = oldmat->cstart; 1452416022c9SBarry Smith a->cend = oldmat->cend; 145317699dbbSLois Curfman McInnes a->size = oldmat->size; 145417699dbbSLois Curfman McInnes a->rank = oldmat->rank; 145564119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1456d6dfbf8fSBarry Smith 14570452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 145817699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 145917699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1460416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14612ee70a88SLois Curfman McInnes if (oldmat->colmap) { 14620452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1463416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1464416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1465416022c9SBarry Smith } else a->colmap = 0; 1466ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 14670452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1468464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1469416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1470416022c9SBarry Smith } else a->garray = 0; 1471d6dfbf8fSBarry Smith 1472416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1473416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1474a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1475416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1476416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1477416022c9SBarry Smith PLogObjectParent(mat,a->A); 1478416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1479416022c9SBarry Smith PLogObjectParent(mat,a->B); 14808a729477SBarry Smith *newmat = mat; 14818a729477SBarry Smith return 0; 14828a729477SBarry Smith } 1483416022c9SBarry Smith 1484416022c9SBarry Smith #include "sysio.h" 1485416022c9SBarry Smith 148602834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1487416022c9SBarry Smith { 1488d65a2f8fSBarry Smith Mat A; 1489416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1490d65a2f8fSBarry Smith Scalar *vals,*svals; 1491416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1492416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1493416022c9SBarry Smith MPI_Status status; 149417699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1495d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1496d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1497416022c9SBarry Smith 149817699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 149917699dbbSLois Curfman McInnes if (!rank) { 1500416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1501416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 150202834360SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1503416022c9SBarry Smith } 1504416022c9SBarry Smith 1505416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1506416022c9SBarry Smith M = header[1]; N = header[2]; 1507416022c9SBarry Smith /* determine ownership of all rows */ 150817699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15090452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1510416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1511416022c9SBarry Smith rowners[0] = 0; 151217699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1513416022c9SBarry Smith rowners[i] += rowners[i-1]; 1514416022c9SBarry Smith } 151517699dbbSLois Curfman McInnes rstart = rowners[rank]; 151617699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1517416022c9SBarry Smith 1518416022c9SBarry Smith /* distribute row lengths to all processors */ 15190452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1520416022c9SBarry Smith offlens = ourlens + (rend-rstart); 152117699dbbSLois Curfman McInnes if (!rank) { 15220452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1523416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 15240452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 152517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1526d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15270452661fSBarry Smith PetscFree(sndcounts); 1528416022c9SBarry Smith } 1529416022c9SBarry Smith else { 1530416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1531416022c9SBarry Smith } 1532416022c9SBarry Smith 153317699dbbSLois Curfman McInnes if (!rank) { 1534416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15350452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1536cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 153717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1538416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1539416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1540416022c9SBarry Smith } 1541416022c9SBarry Smith } 15420452661fSBarry Smith PetscFree(rowlengths); 1543416022c9SBarry Smith 1544416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1545416022c9SBarry Smith maxnz = 0; 154617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15470452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1548416022c9SBarry Smith } 15490452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1550416022c9SBarry Smith 1551416022c9SBarry Smith /* read in my part of the matrix column indices */ 1552416022c9SBarry Smith nz = procsnz[0]; 15530452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1554d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1555d65a2f8fSBarry Smith 1556d65a2f8fSBarry Smith /* read in every one elses and ship off */ 155717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1558d65a2f8fSBarry Smith nz = procsnz[i]; 1559416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1560d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1561d65a2f8fSBarry Smith } 15620452661fSBarry Smith PetscFree(cols); 1563416022c9SBarry Smith } 1564416022c9SBarry Smith else { 1565416022c9SBarry Smith /* determine buffer space needed for message */ 1566416022c9SBarry Smith nz = 0; 1567416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1568416022c9SBarry Smith nz += ourlens[i]; 1569416022c9SBarry Smith } 15700452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1571416022c9SBarry Smith 1572416022c9SBarry Smith /* receive message of column indices*/ 1573d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1574416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 157502834360SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1576416022c9SBarry Smith } 1577416022c9SBarry Smith 1578416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1579cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1580416022c9SBarry Smith jj = 0; 1581416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1582416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1583d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1584416022c9SBarry Smith jj++; 1585416022c9SBarry Smith } 1586416022c9SBarry Smith } 1587d65a2f8fSBarry Smith 1588d65a2f8fSBarry Smith /* create our matrix */ 1589416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1590416022c9SBarry Smith ourlens[i] -= offlens[i]; 1591416022c9SBarry Smith } 1592d65a2f8fSBarry Smith if (type == MATMPIAIJ) { 1593d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1594d65a2f8fSBarry Smith } 1595d65a2f8fSBarry Smith else if (type == MATMPIROW) { 1596d65a2f8fSBarry Smith ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1597d65a2f8fSBarry Smith } 1598d65a2f8fSBarry Smith A = *newmat; 1599d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1600d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1601d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1602d65a2f8fSBarry Smith } 1603416022c9SBarry Smith 160417699dbbSLois Curfman McInnes if (!rank) { 16050452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1606416022c9SBarry Smith 1607416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1608416022c9SBarry Smith nz = procsnz[0]; 1609416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1610d65a2f8fSBarry Smith 1611d65a2f8fSBarry Smith /* insert into matrix */ 1612d65a2f8fSBarry Smith jj = rstart; 1613d65a2f8fSBarry Smith smycols = mycols; 1614d65a2f8fSBarry Smith svals = vals; 1615d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1616d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1617d65a2f8fSBarry Smith smycols += ourlens[i]; 1618d65a2f8fSBarry Smith svals += ourlens[i]; 1619d65a2f8fSBarry Smith jj++; 1620416022c9SBarry Smith } 1621416022c9SBarry Smith 1622d65a2f8fSBarry Smith /* read in other processors and ship out */ 162317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1624416022c9SBarry Smith nz = procsnz[i]; 1625416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1626d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1627416022c9SBarry Smith } 16280452661fSBarry Smith PetscFree(procsnz); 1629416022c9SBarry Smith } 1630d65a2f8fSBarry Smith else { 1631d65a2f8fSBarry Smith /* receive numeric values */ 16320452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1633416022c9SBarry Smith 1634d65a2f8fSBarry Smith /* receive message of values*/ 1635d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1636d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 163702834360SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1638d65a2f8fSBarry Smith 1639d65a2f8fSBarry Smith /* insert into matrix */ 1640d65a2f8fSBarry Smith jj = rstart; 1641d65a2f8fSBarry Smith smycols = mycols; 1642d65a2f8fSBarry Smith svals = vals; 1643d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1644d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1645d65a2f8fSBarry Smith smycols += ourlens[i]; 1646d65a2f8fSBarry Smith svals += ourlens[i]; 1647d65a2f8fSBarry Smith jj++; 1648d65a2f8fSBarry Smith } 1649d65a2f8fSBarry Smith } 16500452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1651d65a2f8fSBarry Smith 1652d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1653d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1654416022c9SBarry Smith return 0; 1655416022c9SBarry Smith } 1656