1cb512458SBarry Smith #ifndef lint 2*b49de8d1SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.99 1995/11/09 22:28:54 bsmith Exp curfman $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 392ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 401eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46dbb450caSBarry Smith int shift = C->indexshift; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 53bbb6d6a8SBarry Smith if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54bbb6d6a8SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 551eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 561eb62cbbSBarry Smith row = idxm[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 58bbb6d6a8SBarry Smith if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59bbb6d6a8SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 601eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 611eb62cbbSBarry Smith col = idxn[j] - cstart; 6278b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 631eb62cbbSBarry Smith } 641eb62cbbSBarry Smith else { 65d6dfbf8fSBarry Smith if (aij->assembled) { 66b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67dbb450caSBarry Smith col = aij->colmap[idxn[j]] + shift; 68ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 692493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 702493cbb0SBarry Smith col = idxn[j]; 71d6dfbf8fSBarry Smith } 729e25ed09SBarry Smith } 739e25ed09SBarry Smith else col = idxn[j]; 7478b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 751eb62cbbSBarry Smith } 761eb62cbbSBarry Smith } 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith else { 79416022c9SBarry Smith ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 801eb62cbbSBarry Smith } 818a729477SBarry Smith } 828a729477SBarry Smith return 0; 838a729477SBarry Smith } 848a729477SBarry Smith 85*b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 86*b49de8d1SLois Curfman McInnes { 87*b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88*b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 89*b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 90*b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 91*b49de8d1SLois Curfman McInnes int shift = C->indexshift; 92*b49de8d1SLois Curfman McInnes 93*b49de8d1SLois Curfman McInnes if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 94*b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 95*b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 96*b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 97*b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 98*b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 99*b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 100*b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 101*b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 102*b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 103*b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 104*b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 105*b49de8d1SLois Curfman McInnes } 106*b49de8d1SLois Curfman McInnes else { 107*b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 108*b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 109*b49de8d1SLois Curfman McInnes } 110*b49de8d1SLois Curfman McInnes } 111*b49de8d1SLois Curfman McInnes } 112*b49de8d1SLois Curfman McInnes else { 113*b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 114*b49de8d1SLois Curfman McInnes } 115*b49de8d1SLois Curfman McInnes } 116*b49de8d1SLois Curfman McInnes return 0; 117*b49de8d1SLois Curfman McInnes } 118*b49de8d1SLois Curfman McInnes 119fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1208a729477SBarry Smith { 12144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 122d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 12317699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 12417699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1251eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1266abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1271eb62cbbSBarry Smith InsertMode addv; 1281eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1291eb62cbbSBarry Smith 1301eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 131d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 132dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 133bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1341eb62cbbSBarry Smith } 1351eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1361eb62cbbSBarry Smith 1371eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1380452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 139cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1400452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1411eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1421eb62cbbSBarry Smith idx = aij->stash.idx[i]; 14317699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1441eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1451eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1468a729477SBarry Smith } 1478a729477SBarry Smith } 1488a729477SBarry Smith } 14917699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1501eb62cbbSBarry Smith 1511eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1520452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 15317699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 15417699dbbSLois Curfman McInnes nreceives = work[rank]; 15517699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 15617699dbbSLois Curfman McInnes nmax = work[rank]; 1570452661fSBarry Smith PetscFree(work); 1581eb62cbbSBarry Smith 1591eb62cbbSBarry Smith /* post receives: 1601eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1611eb62cbbSBarry Smith (global index,value) we store the global index as a double 162d6dfbf8fSBarry Smith to simplify the message passing. 1631eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1641eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1651eb62cbbSBarry Smith this is a lot of wasted space. 1661eb62cbbSBarry Smith 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith This could be done better. 1691eb62cbbSBarry Smith */ 1700452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 17178b31e54SBarry Smith CHKPTRQ(rvalues); 1720452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 17378b31e54SBarry Smith CHKPTRQ(recv_waits); 1741eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 175416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1761eb62cbbSBarry Smith comm,recv_waits+i); 1771eb62cbbSBarry Smith } 1781eb62cbbSBarry Smith 1791eb62cbbSBarry Smith /* do sends: 1801eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1811eb62cbbSBarry Smith the ith processor 1821eb62cbbSBarry Smith */ 1830452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1840452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 18578b31e54SBarry Smith CHKPTRQ(send_waits); 1860452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1871eb62cbbSBarry Smith starts[0] = 0; 18817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1891eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1901eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1911eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1921eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1931eb62cbbSBarry Smith } 1940452661fSBarry Smith PetscFree(owner); 1951eb62cbbSBarry Smith starts[0] = 0; 19617699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1971eb62cbbSBarry Smith count = 0; 19817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1991eb62cbbSBarry Smith if (procs[i]) { 200416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2011eb62cbbSBarry Smith comm,send_waits+count++); 2021eb62cbbSBarry Smith } 2031eb62cbbSBarry Smith } 2040452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2051eb62cbbSBarry Smith 2061eb62cbbSBarry Smith /* Free cache space */ 20778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2081eb62cbbSBarry Smith 2091eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2101eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2111eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2121eb62cbbSBarry Smith aij->rmax = nmax; 2131eb62cbbSBarry Smith 2141eb62cbbSBarry Smith return 0; 2151eb62cbbSBarry Smith } 21644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2171eb62cbbSBarry Smith 218fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2191eb62cbbSBarry Smith { 22044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 221dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2221eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 223416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 224416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2251eb62cbbSBarry Smith Scalar *values,val; 2261eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2271eb62cbbSBarry Smith 2281eb62cbbSBarry Smith /* wait on receives */ 2291eb62cbbSBarry Smith while (count) { 230d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2311eb62cbbSBarry Smith /* unpack receives into our local space */ 232d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 233c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2341eb62cbbSBarry Smith n = n/3; 2351eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2361eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2371eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2381eb62cbbSBarry Smith val = values[3*i+2]; 2391eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2401eb62cbbSBarry Smith col -= aij->cstart; 2411eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2421eb62cbbSBarry Smith } 2431eb62cbbSBarry Smith else { 244d6dfbf8fSBarry Smith if (aij->assembled) { 245b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 246dbb450caSBarry Smith col = aij->colmap[col] + shift; 247ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2482493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2492493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 250d6dfbf8fSBarry Smith } 2519e25ed09SBarry Smith } 2521eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2531eb62cbbSBarry Smith } 2541eb62cbbSBarry Smith } 2551eb62cbbSBarry Smith count--; 2561eb62cbbSBarry Smith } 2570452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2581eb62cbbSBarry Smith 2591eb62cbbSBarry Smith /* wait on sends */ 2601eb62cbbSBarry Smith if (aij->nsends) { 2610452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 26278b31e54SBarry Smith CHKPTRQ(send_status); 2631eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2640452661fSBarry Smith PetscFree(send_status); 2651eb62cbbSBarry Smith } 2660452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2671eb62cbbSBarry Smith 26864119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 26978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 27078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2711eb62cbbSBarry Smith 2722493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2732493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 274416022c9SBarry Smith MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 2752493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2762493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2772493cbb0SBarry Smith } 2782493cbb0SBarry Smith 2795e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 28078b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 281d6dfbf8fSBarry Smith aij->assembled = 1; 2825e42470aSBarry Smith } 28378b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 28478b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2855e42470aSBarry Smith 2868a729477SBarry Smith return 0; 2878a729477SBarry Smith } 2888a729477SBarry Smith 2892ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2901eb62cbbSBarry Smith { 29144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 292dbd7a890SLois Curfman McInnes int ierr; 29378b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 29478b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2951eb62cbbSBarry Smith return 0; 2961eb62cbbSBarry Smith } 2971eb62cbbSBarry Smith 2981eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2991eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3001eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3011eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3021eb62cbbSBarry Smith routine. 3031eb62cbbSBarry Smith */ 30444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3051eb62cbbSBarry Smith { 30644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 30717699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3086abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 30917699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3105392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 311d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 312d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3131eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3141eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3151eb62cbbSBarry Smith IS istmp; 3161eb62cbbSBarry Smith 31748d91487SBarry Smith if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 31878b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 31978b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3201eb62cbbSBarry Smith 3211eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3220452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3240452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3251eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3261eb62cbbSBarry Smith idx = rows[i]; 3271eb62cbbSBarry Smith found = 0; 32817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3291eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3301eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3311eb62cbbSBarry Smith } 3321eb62cbbSBarry Smith } 333bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3341eb62cbbSBarry Smith } 33517699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3380452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 33917699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34017699dbbSLois Curfman McInnes nrecvs = work[rank]; 34117699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 34217699dbbSLois Curfman McInnes nmax = work[rank]; 3430452661fSBarry Smith PetscFree(work); 3441eb62cbbSBarry Smith 3451eb62cbbSBarry Smith /* post receives: */ 3460452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 34778b31e54SBarry Smith CHKPTRQ(rvalues); 3480452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 34978b31e54SBarry Smith CHKPTRQ(recv_waits); 3501eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 351416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3521eb62cbbSBarry Smith } 3531eb62cbbSBarry Smith 3541eb62cbbSBarry Smith /* do sends: 3551eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3561eb62cbbSBarry Smith the ith processor 3571eb62cbbSBarry Smith */ 3580452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3590452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36078b31e54SBarry Smith CHKPTRQ(send_waits); 3610452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3621eb62cbbSBarry Smith starts[0] = 0; 36317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3641eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3651eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3661eb62cbbSBarry Smith } 3671eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3681eb62cbbSBarry Smith 3691eb62cbbSBarry Smith starts[0] = 0; 37017699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3711eb62cbbSBarry Smith count = 0; 37217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3731eb62cbbSBarry Smith if (procs[i]) { 374416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3751eb62cbbSBarry Smith } 3761eb62cbbSBarry Smith } 3770452661fSBarry Smith PetscFree(starts); 3781eb62cbbSBarry Smith 37917699dbbSLois Curfman McInnes base = owners[rank]; 3801eb62cbbSBarry Smith 3811eb62cbbSBarry Smith /* wait on receives */ 3820452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3831eb62cbbSBarry Smith source = lens + nrecvs; 3841eb62cbbSBarry Smith count = nrecvs; slen = 0; 3851eb62cbbSBarry Smith while (count) { 386d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3871eb62cbbSBarry Smith /* unpack receives into our local space */ 3881eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 389d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 390d6dfbf8fSBarry Smith lens[imdex] = n; 3911eb62cbbSBarry Smith slen += n; 3921eb62cbbSBarry Smith count--; 3931eb62cbbSBarry Smith } 3940452661fSBarry Smith PetscFree(recv_waits); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* move the data into the send scatter */ 3970452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3981eb62cbbSBarry Smith count = 0; 3991eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4001eb62cbbSBarry Smith values = rvalues + i*nmax; 4011eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4021eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4031eb62cbbSBarry Smith } 4041eb62cbbSBarry Smith } 4050452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4060452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4071eb62cbbSBarry Smith 4081eb62cbbSBarry Smith /* actually zap the local rows */ 409416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 410464493b3SBarry Smith PLogObjectParent(A,istmp); 4110452661fSBarry Smith PetscFree(lrows); 41278b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 41378b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 41478b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4151eb62cbbSBarry Smith 4161eb62cbbSBarry Smith /* wait on sends */ 4171eb62cbbSBarry Smith if (nsends) { 4180452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 41978b31e54SBarry Smith CHKPTRQ(send_status); 4201eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4210452661fSBarry Smith PetscFree(send_status); 4221eb62cbbSBarry Smith } 4230452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4241eb62cbbSBarry Smith 4251eb62cbbSBarry Smith return 0; 4261eb62cbbSBarry Smith } 4271eb62cbbSBarry Smith 428416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4291eb62cbbSBarry Smith { 430416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4311eb62cbbSBarry Smith int ierr; 432416022c9SBarry Smith 43348d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 43464119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 435416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 43664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 437416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4381eb62cbbSBarry Smith return 0; 4391eb62cbbSBarry Smith } 4401eb62cbbSBarry Smith 441416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 442da3a660dSBarry Smith { 443416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444da3a660dSBarry Smith int ierr; 44548d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 44664119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 44864119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 449416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 450da3a660dSBarry Smith return 0; 451da3a660dSBarry Smith } 452da3a660dSBarry Smith 453416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 454da3a660dSBarry Smith { 455416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 456da3a660dSBarry Smith int ierr; 457da3a660dSBarry Smith 45848d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 459da3a660dSBarry Smith /* do nondiagonal part */ 460416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 461da3a660dSBarry Smith /* send it on its way */ 462416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46364119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 464da3a660dSBarry Smith /* do local part */ 465416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 466da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 467da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 468da3a660dSBarry Smith /* but is not perhaps always true. */ 469416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47064119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 471da3a660dSBarry Smith return 0; 472da3a660dSBarry Smith } 473da3a660dSBarry Smith 474416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 475da3a660dSBarry Smith { 476416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 477da3a660dSBarry Smith int ierr; 478da3a660dSBarry Smith 47948d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 480da3a660dSBarry Smith /* do nondiagonal part */ 481416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 482da3a660dSBarry Smith /* send it on its way */ 483416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 485da3a660dSBarry Smith /* do local part */ 486416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 487da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 488da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 489da3a660dSBarry Smith /* but is not perhaps always true. */ 490416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 492da3a660dSBarry Smith return 0; 493da3a660dSBarry Smith } 494da3a660dSBarry Smith 4951eb62cbbSBarry Smith /* 4961eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4971eb62cbbSBarry Smith diagonal block 4981eb62cbbSBarry Smith */ 499416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5001eb62cbbSBarry Smith { 501416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 50248d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 503416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5041eb62cbbSBarry Smith } 5051eb62cbbSBarry Smith 50644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5071eb62cbbSBarry Smith { 5081eb62cbbSBarry Smith Mat mat = (Mat) obj; 50944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5101eb62cbbSBarry Smith int ierr; 511a5a9c739SBarry Smith #if defined(PETSC_LOG) 5126e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 513a5a9c739SBarry Smith #endif 5140452661fSBarry Smith PetscFree(aij->rowners); 51578b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 51678b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5170452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5180452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5191eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 520a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5210452661fSBarry Smith PetscFree(aij); 522a5a9c739SBarry Smith PLogObjectDestroy(mat); 5230452661fSBarry Smith PetscHeaderDestroy(mat); 5241eb62cbbSBarry Smith return 0; 5251eb62cbbSBarry Smith } 5267857610eSBarry Smith #include "draw.h" 527b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 528ee50ffe9SBarry Smith 529416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5301eb62cbbSBarry Smith { 531416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 532416022c9SBarry Smith int ierr; 533416022c9SBarry Smith 53417699dbbSLois Curfman McInnes if (aij->size == 1) { 535416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 536416022c9SBarry Smith } 537a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 538416022c9SBarry Smith return 0; 539416022c9SBarry Smith } 540416022c9SBarry Smith 541d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 542416022c9SBarry Smith { 54344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 545a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 546d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 547d636dbe3SBarry Smith FILE *fd; 548416022c9SBarry Smith 549416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 550416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 55108480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 552a56f8943SBarry Smith int nz,nzalloc,mem; 553a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 554a56f8943SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 555a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 556a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 557a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 558a56f8943SBarry Smith nzalloc,mem); 55908480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 56008480c60SBarry Smith fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 56108480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 56208480c60SBarry Smith fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 563a56f8943SBarry Smith fflush(fd); 564a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 565a56f8943SBarry Smith ierr = VecScatterView(aij->Mvctx,viewer); 566416022c9SBarry Smith return 0; 567416022c9SBarry Smith } 56808480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 56908480c60SBarry Smith return 0; 57008480c60SBarry Smith } 571416022c9SBarry Smith } 572416022c9SBarry Smith 573416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 574d636dbe3SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 5757f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 576d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 57717699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5781eb62cbbSBarry Smith aij->cend); 57978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 58078b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 581d13ab20cSBarry Smith fflush(fd); 5827f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 583d13ab20cSBarry Smith } 584416022c9SBarry Smith else { 585a56f8943SBarry Smith int size = aij->size; 586a56f8943SBarry Smith rank = aij->rank; 58717699dbbSLois Curfman McInnes if (size == 1) { 58878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 58995373324SBarry Smith } 59095373324SBarry Smith else { 59195373324SBarry Smith /* assemble the entire matrix onto first processor. */ 59295373324SBarry Smith Mat A; 593ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 5942eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 59595373324SBarry Smith Scalar *a; 5962ee70a88SLois Curfman McInnes 59717699dbbSLois Curfman McInnes if (!rank) { 598416022c9SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 59995373324SBarry Smith } 60095373324SBarry Smith else { 601416022c9SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 60295373324SBarry Smith } 603464493b3SBarry Smith PLogObjectParent(mat,A); 604416022c9SBarry Smith 60595373324SBarry Smith 60695373324SBarry Smith /* copy over the A part */ 607ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6082ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 60995373324SBarry Smith row = aij->rstart; 610dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 61195373324SBarry Smith for ( i=0; i<m; i++ ) { 612416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 61395373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 61495373324SBarry Smith } 6152ee70a88SLois Curfman McInnes aj = Aloc->j; 616dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 61795373324SBarry Smith 61895373324SBarry Smith /* copy over the B part */ 619ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6202ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62195373324SBarry Smith row = aij->rstart; 6220452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 623dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 62495373324SBarry Smith for ( i=0; i<m; i++ ) { 625416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 62695373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 62795373324SBarry Smith } 6280452661fSBarry Smith PetscFree(ct); 62978b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 63078b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 63117699dbbSLois Curfman McInnes if (!rank) { 63278b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 63395373324SBarry Smith } 63478b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 63595373324SBarry Smith } 63695373324SBarry Smith } 6371eb62cbbSBarry Smith return 0; 6381eb62cbbSBarry Smith } 6391eb62cbbSBarry Smith 640416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 641416022c9SBarry Smith { 642416022c9SBarry Smith Mat mat = (Mat) obj; 643416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 644416022c9SBarry Smith int ierr; 645416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 646416022c9SBarry Smith 64748d91487SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 648416022c9SBarry Smith if (!viewer) { 649416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 650416022c9SBarry Smith } 651416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 652416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 653d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 654416022c9SBarry Smith } 655416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 656d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 657d7e8b826SBarry Smith } 658d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 659d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 660416022c9SBarry Smith } 661416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 662d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 663416022c9SBarry Smith } 664416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 665416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 666416022c9SBarry Smith } 667416022c9SBarry Smith return 0; 668416022c9SBarry Smith } 669416022c9SBarry Smith 670ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6711eb62cbbSBarry Smith /* 6721eb62cbbSBarry Smith This has to provide several versions. 6731eb62cbbSBarry Smith 6741eb62cbbSBarry Smith 1) per sequential 6751eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6761eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 677d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6781eb62cbbSBarry Smith */ 679fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 680dbb450caSBarry Smith double fshift,int its,Vec xx) 6818a729477SBarry Smith { 68244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 683d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 684ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6856abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6866abc6512SBarry Smith int ierr,*idx, *diag; 687416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 688da3a660dSBarry Smith Vec tt; 6898a729477SBarry Smith 69048d91487SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 6911eb62cbbSBarry Smith 692d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 693dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 694dbb450caSBarry Smith ls = ls + shift; 695ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 696d6dfbf8fSBarry Smith diag = A->diag; 697acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 69848d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 699acb40c82SBarry Smith } 700da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 701da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 702da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 703da3a660dSBarry Smith 704da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 705da3a660dSBarry Smith 706da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 707da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 708da3a660dSBarry Smith is the relaxation factor. 709da3a660dSBarry Smith */ 71078b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 711464493b3SBarry Smith PLogObjectParent(matin,tt); 712da3a660dSBarry Smith VecGetArray(tt,&t); 713da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 714da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 715da3a660dSBarry Smith VecSet(&zero,mat->lvec); 71664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 71778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 718da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 719da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 720dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 721dbb450caSBarry Smith v = A->a + diag[i] + !shift; 722da3a660dSBarry Smith sum = b[i]; 723da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 724dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 725da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 726dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 727dbb450caSBarry Smith v = B->a + B->i[i] + shift; 728da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 729da3a660dSBarry Smith x[i] = omega*(sum/d); 730da3a660dSBarry Smith } 73164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 73278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 733da3a660dSBarry Smith 734da3a660dSBarry Smith /* t = b - (2*E - D)x */ 735da3a660dSBarry Smith v = A->a; 736dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 737da3a660dSBarry Smith 738da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 739dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 740da3a660dSBarry Smith diag = A->diag; 741da3a660dSBarry Smith VecSet(&zero,mat->lvec); 74264119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 74378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 744da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 745da3a660dSBarry Smith n = diag[i] - A->i[i]; 746dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 747dbb450caSBarry Smith v = A->a + A->i[i] + shift; 748da3a660dSBarry Smith sum = t[i]; 749da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 750dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 751da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 752dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 753dbb450caSBarry Smith v = B->a + B->i[i] + shift; 754da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 755da3a660dSBarry Smith t[i] = omega*(sum/d); 756da3a660dSBarry Smith } 75764119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 759da3a660dSBarry Smith /* x = x + t */ 760da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 761da3a660dSBarry Smith VecDestroy(tt); 762da3a660dSBarry Smith return 0; 763da3a660dSBarry Smith } 764da3a660dSBarry Smith 7651eb62cbbSBarry Smith 766d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 767da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 768da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 769da3a660dSBarry Smith } 770da3a660dSBarry Smith else { 77164119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 77278b31e54SBarry Smith CHKERRQ(ierr); 77364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 77478b31e54SBarry Smith CHKERRQ(ierr); 775da3a660dSBarry Smith } 776d6dfbf8fSBarry Smith while (its--) { 777d6dfbf8fSBarry Smith /* go down through the rows */ 77864119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 77978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 780d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 781d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 782dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 783dbb450caSBarry Smith v = A->a + A->i[i] + shift; 784d6dfbf8fSBarry Smith sum = b[i]; 785d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 786dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 787d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 788dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 789dbb450caSBarry Smith v = B->a + B->i[i] + shift; 790d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 791d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 792d6dfbf8fSBarry Smith } 79364119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 79478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 795d6dfbf8fSBarry Smith /* come up through the rows */ 79664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 79778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 798d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 799d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 800dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 801dbb450caSBarry Smith v = A->a + A->i[i] + shift; 802d6dfbf8fSBarry Smith sum = b[i]; 803d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 804dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 805d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 806dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 807dbb450caSBarry Smith v = B->a + B->i[i] + shift; 808d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 809d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 810d6dfbf8fSBarry Smith } 81164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 81278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 813d6dfbf8fSBarry Smith } 814d6dfbf8fSBarry Smith } 815d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 816da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 817da3a660dSBarry Smith VecSet(&zero,mat->lvec); 81864119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 81978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 820da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 821da3a660dSBarry Smith n = diag[i] - A->i[i]; 822dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 823dbb450caSBarry Smith v = A->a + A->i[i] + shift; 824da3a660dSBarry Smith sum = b[i]; 825da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 826dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 827da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 828dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 829dbb450caSBarry Smith v = B->a + B->i[i] + shift; 830da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 831da3a660dSBarry Smith x[i] = omega*(sum/d); 832da3a660dSBarry Smith } 83364119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 83478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 835da3a660dSBarry Smith its--; 836da3a660dSBarry Smith } 837d6dfbf8fSBarry Smith while (its--) { 83864119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 83978b31e54SBarry Smith CHKERRQ(ierr); 84064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84178b31e54SBarry Smith CHKERRQ(ierr); 84264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 844d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 845d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 846dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 847dbb450caSBarry Smith v = A->a + A->i[i] + shift; 848d6dfbf8fSBarry Smith sum = b[i]; 849d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 850dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 851d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 852dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 853dbb450caSBarry Smith v = B->a + B->i[i] + shift; 854d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 855d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 856d6dfbf8fSBarry Smith } 85764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 859d6dfbf8fSBarry Smith } 860d6dfbf8fSBarry Smith } 861d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 862da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 863da3a660dSBarry Smith VecSet(&zero,mat->lvec); 86464119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 86578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 866da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 867da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 868dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 869dbb450caSBarry Smith v = A->a + diag[i] + !shift; 870da3a660dSBarry Smith sum = b[i]; 871da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 872dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 873da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 874dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 875dbb450caSBarry Smith v = B->a + B->i[i] + shift; 876da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 877da3a660dSBarry Smith x[i] = omega*(sum/d); 878da3a660dSBarry Smith } 87964119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 881da3a660dSBarry Smith its--; 882da3a660dSBarry Smith } 883d6dfbf8fSBarry Smith while (its--) { 88464119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 88578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 88664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 88778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 88864119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 890d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 891d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 892dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 893dbb450caSBarry Smith v = A->a + A->i[i] + shift; 894d6dfbf8fSBarry Smith sum = b[i]; 895d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 896dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 897d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 898dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 899dbb450caSBarry Smith v = B->a + B->i[i] + shift; 900d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 901d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 902d6dfbf8fSBarry Smith } 90364119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 90478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 905d6dfbf8fSBarry Smith } 906d6dfbf8fSBarry Smith } 907d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 908da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 909dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 910da3a660dSBarry Smith } 91164119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 91278b31e54SBarry Smith CHKERRQ(ierr); 91364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 91478b31e54SBarry Smith CHKERRQ(ierr); 915d6dfbf8fSBarry Smith while (its--) { 916d6dfbf8fSBarry Smith /* go down through the rows */ 917d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 918d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 919dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 920dbb450caSBarry Smith v = A->a + A->i[i] + shift; 921d6dfbf8fSBarry Smith sum = b[i]; 922d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 923dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 924d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 925dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 926dbb450caSBarry Smith v = B->a + B->i[i] + shift; 927d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 928d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 929d6dfbf8fSBarry Smith } 930d6dfbf8fSBarry Smith /* come up through the rows */ 931d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 932d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 933dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 934dbb450caSBarry Smith v = A->a + A->i[i] + shift; 935d6dfbf8fSBarry Smith sum = b[i]; 936d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 937dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 938d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 939dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 940dbb450caSBarry Smith v = B->a + B->i[i] + shift; 941d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 942d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943d6dfbf8fSBarry Smith } 944d6dfbf8fSBarry Smith } 945d6dfbf8fSBarry Smith } 946d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 947da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 948dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 949da3a660dSBarry Smith } 95064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 95178b31e54SBarry Smith CHKERRQ(ierr); 95264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 95378b31e54SBarry Smith CHKERRQ(ierr); 954d6dfbf8fSBarry Smith while (its--) { 955d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 956d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 957dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 958dbb450caSBarry Smith v = A->a + A->i[i] + shift; 959d6dfbf8fSBarry Smith sum = b[i]; 960d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 961dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 962d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 963dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 964dbb450caSBarry Smith v = B->a + B->i[i] + shift; 965d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 966d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 967d6dfbf8fSBarry Smith } 968d6dfbf8fSBarry Smith } 969d6dfbf8fSBarry Smith } 970d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 971da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 972dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 973da3a660dSBarry Smith } 97464119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 97578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 97664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 97778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 978d6dfbf8fSBarry Smith while (its--) { 979d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 980d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 981dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 982dbb450caSBarry Smith v = A->a + A->i[i] + shift; 983d6dfbf8fSBarry Smith sum = b[i]; 984d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 985dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 986d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 987dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 988dbb450caSBarry Smith v = B->a + B->i[i] + shift; 989d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 990d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 991d6dfbf8fSBarry Smith } 992d6dfbf8fSBarry Smith } 993d6dfbf8fSBarry Smith } 9948a729477SBarry Smith return 0; 9958a729477SBarry Smith } 996a66be287SLois Curfman McInnes 997fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 998fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 999a66be287SLois Curfman McInnes { 1000a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1001a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1002a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1003a66be287SLois Curfman McInnes 100478b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 100578b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1006a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1007a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1008a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1009a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1010d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1011a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1012a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1013d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1014a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1015a66be287SLois Curfman McInnes } 1016a66be287SLois Curfman McInnes return 0; 1017a66be287SLois Curfman McInnes } 1018a66be287SLois Curfman McInnes 1019299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1020299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1021299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1022299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1023299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1024299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1025299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1026299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1027299609e3SLois Curfman McInnes 1028416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1029c74985f6SBarry Smith { 1030c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1031c74985f6SBarry Smith 1032c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1033c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1034c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1035c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1036c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1037c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1038c74985f6SBarry Smith } 1039c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1040c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1041c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1042c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1043c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1044bbb6d6a8SBarry Smith else if (op == COLUMN_ORIENTED) 10454d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1046c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10474d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1048c0bbcb79SLois Curfman McInnes else 10494d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1050c74985f6SBarry Smith return 0; 1051c74985f6SBarry Smith } 1052c74985f6SBarry Smith 1053d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1054c74985f6SBarry Smith { 105544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1056c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1057c74985f6SBarry Smith return 0; 1058c74985f6SBarry Smith } 1059c74985f6SBarry Smith 1060d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1061c74985f6SBarry Smith { 106244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1063b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1064c74985f6SBarry Smith return 0; 1065c74985f6SBarry Smith } 1066c74985f6SBarry Smith 1067d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1068c74985f6SBarry Smith { 106944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1070c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1071c74985f6SBarry Smith return 0; 1072c74985f6SBarry Smith } 1073c74985f6SBarry Smith 107439e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 107539e00950SLois Curfman McInnes { 1076154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1077154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1078154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1079154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 108039e00950SLois Curfman McInnes 1081*b49de8d1SLois Curfman McInnes if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1082*b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1083abc0e9e4SLois Curfman McInnes lrow = row - rstart; 108439e00950SLois Curfman McInnes 1085154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1086154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1087154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 108878b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 108978b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1090154123eaSLois Curfman McInnes nztot = nzA + nzB; 1091154123eaSLois Curfman McInnes 1092154123eaSLois Curfman McInnes if (v || idx) { 1093154123eaSLois Curfman McInnes if (nztot) { 1094154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1095299609e3SLois Curfman McInnes int imark; 109648b35521SBarry Smith if (mat->assembled) { 1097154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 109848b35521SBarry Smith } 1099154123eaSLois Curfman McInnes if (v) { 11000452661fSBarry Smith *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 110139e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1102154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1103154123eaSLois Curfman McInnes else break; 1104154123eaSLois Curfman McInnes } 1105154123eaSLois Curfman McInnes imark = i; 1106154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1107299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1108154123eaSLois Curfman McInnes } 1109154123eaSLois Curfman McInnes if (idx) { 11100452661fSBarry Smith *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1111154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1112154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1113154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1114154123eaSLois Curfman McInnes else break; 1115154123eaSLois Curfman McInnes } 1116154123eaSLois Curfman McInnes imark = i; 1117154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1118299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 111939e00950SLois Curfman McInnes } 112039e00950SLois Curfman McInnes } 112139e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1122154123eaSLois Curfman McInnes } 112339e00950SLois Curfman McInnes *nz = nztot; 112478b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 112578b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 112639e00950SLois Curfman McInnes return 0; 112739e00950SLois Curfman McInnes } 112839e00950SLois Curfman McInnes 112939e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 113039e00950SLois Curfman McInnes { 11310452661fSBarry Smith if (idx) PetscFree(*idx); 11320452661fSBarry Smith if (v) PetscFree(*v); 113339e00950SLois Curfman McInnes return 0; 113439e00950SLois Curfman McInnes } 113539e00950SLois Curfman McInnes 1136cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1137855ac2c5SLois Curfman McInnes { 1138855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1139ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1140416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1141416022c9SBarry Smith double sum = 0.0; 114204ca555eSLois Curfman McInnes Scalar *v; 114304ca555eSLois Curfman McInnes 1144416022c9SBarry Smith if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 114517699dbbSLois Curfman McInnes if (aij->size == 1) { 114614183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 114737fa93a5SLois Curfman McInnes } else { 114804ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 114904ca555eSLois Curfman McInnes v = amat->a; 115004ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 115104ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 115204ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 115304ca555eSLois Curfman McInnes #else 115404ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 115504ca555eSLois Curfman McInnes #endif 115604ca555eSLois Curfman McInnes } 115704ca555eSLois Curfman McInnes v = bmat->a; 115804ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116104ca555eSLois Curfman McInnes #else 116204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 116304ca555eSLois Curfman McInnes #endif 116404ca555eSLois Curfman McInnes } 116504ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 116604ca555eSLois Curfman McInnes *norm = sqrt(*norm); 116704ca555eSLois Curfman McInnes } 116804ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 116904ca555eSLois Curfman McInnes double *tmp, *tmp2; 117004ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11710452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11720452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1173cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 117404ca555eSLois Curfman McInnes *norm = 0.0; 117504ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 117604ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1177579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 117804ca555eSLois Curfman McInnes } 117904ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 118004ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1181579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 118204ca555eSLois Curfman McInnes } 118304ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 118404ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 118504ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 118604ca555eSLois Curfman McInnes } 11870452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 118804ca555eSLois Curfman McInnes } 118904ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1190515d9167SLois Curfman McInnes double ntemp = 0.0; 119104ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1192dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 119304ca555eSLois Curfman McInnes sum = 0.0; 119404ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1195cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 119604ca555eSLois Curfman McInnes } 1197dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 119804ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1199cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120004ca555eSLois Curfman McInnes } 1201515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 120204ca555eSLois Curfman McInnes } 1203515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 120404ca555eSLois Curfman McInnes } 120504ca555eSLois Curfman McInnes else { 1206515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 120704ca555eSLois Curfman McInnes } 120837fa93a5SLois Curfman McInnes } 1209855ac2c5SLois Curfman McInnes return 0; 1210855ac2c5SLois Curfman McInnes } 1211855ac2c5SLois Curfman McInnes 12120de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1213b7c46309SBarry Smith { 1214b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1215dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1216416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1217416022c9SBarry Smith Mat B; 1218b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1219b7c46309SBarry Smith Scalar *array; 1220b7c46309SBarry Smith 1221416022c9SBarry Smith if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1222b7c46309SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1223b7c46309SBarry Smith CHKERRQ(ierr); 1224b7c46309SBarry Smith 1225b7c46309SBarry Smith /* copy over the A part */ 1226ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1227b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1228b7c46309SBarry Smith row = a->rstart; 1229dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1230b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1231416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1232b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1233b7c46309SBarry Smith } 1234b7c46309SBarry Smith aj = Aloc->j; 1235dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1236b7c46309SBarry Smith 1237b7c46309SBarry Smith /* copy over the B part */ 1238ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1239b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1240b7c46309SBarry Smith row = a->rstart; 12410452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1242dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1243b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1244416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1245b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1246b7c46309SBarry Smith } 12470452661fSBarry Smith PetscFree(ct); 1248b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1249b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12500de55854SLois Curfman McInnes if (matout) { 12510de55854SLois Curfman McInnes *matout = B; 12520de55854SLois Curfman McInnes } else { 12530de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12540452661fSBarry Smith PetscFree(a->rowners); 12550de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12560de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12570452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12580452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12590de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1260a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12610452661fSBarry Smith PetscFree(a); 1262416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12630452661fSBarry Smith PetscHeaderDestroy(B); 12640de55854SLois Curfman McInnes } 1265b7c46309SBarry Smith return 0; 1266b7c46309SBarry Smith } 1267b7c46309SBarry Smith 1268fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1269a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1270d6dfbf8fSBarry Smith 12718a729477SBarry Smith /* -------------------------------------------------------------------*/ 12722ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 127339e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 127444a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 127544a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1276299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1277299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1278299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 127944a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1280b7c46309SBarry Smith MatTranspose_MPIAIJ, 1281a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1282855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1283ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12841eb62cbbSBarry Smith 0, 1285299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1286299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1287299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1288d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1289299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1290*b49de8d1SLois Curfman McInnes 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ,0,0, 1291*b49de8d1SLois Curfman McInnes 0,0,0, 1292*b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIAIJ}; 12938a729477SBarry Smith 12941987afe7SBarry Smith /*@C 1295ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1296ff756334SLois Curfman McInnes (the default parallel PETSc format). 12978a729477SBarry Smith 12988a729477SBarry Smith Input Parameters: 12991eb62cbbSBarry Smith . comm - MPI communicator 13007d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13017d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13027d3e4905SLois Curfman McInnes if N is given) 13037d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13047d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13057d3e4905SLois Curfman McInnes if n is given) 1306ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1307ff756334SLois Curfman McInnes (same for all local rows) 1308ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1309ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1310ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1311ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1312ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1313ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1314ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13158a729477SBarry Smith 1316ff756334SLois Curfman McInnes Output Parameter: 13178a729477SBarry Smith . newmat - the matrix 13188a729477SBarry Smith 1319ff756334SLois Curfman McInnes Notes: 1320ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1321ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13220002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13230002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1324ff756334SLois Curfman McInnes 1325ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1326ff756334SLois Curfman McInnes (possibly both). 1327ff756334SLois Curfman McInnes 1328e0245417SLois Curfman McInnes Storage Information: 1329e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1330e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1331e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1332e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1333e0245417SLois Curfman McInnes 1334e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1335e0245417SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1336e0245417SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1337e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1338e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1339ff756334SLois Curfman McInnes 1340dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1341ff756334SLois Curfman McInnes 1342fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13438a729477SBarry Smith @*/ 13441eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13451eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 13468a729477SBarry Smith { 13478a729477SBarry Smith Mat mat; 1348416022c9SBarry Smith Mat_MPIAIJ *a; 13496abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1350416022c9SBarry Smith 13518a729477SBarry Smith *newmat = 0; 13520452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1353a5a9c739SBarry Smith PLogObjectCreate(mat); 13540452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1355416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 135644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 135744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 13588a729477SBarry Smith mat->factor = 0; 1359d6dfbf8fSBarry Smith 136064119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 136117699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 136217699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 13631eb62cbbSBarry Smith 13641987afe7SBarry Smith if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 13651987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 13661987afe7SBarry Smith 1367dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13681eb62cbbSBarry Smith work[0] = m; work[1] = n; 1369d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1370dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1371dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13721eb62cbbSBarry Smith } 137317699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 137417699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1375416022c9SBarry Smith a->m = m; 1376416022c9SBarry Smith a->n = n; 1377416022c9SBarry Smith a->N = N; 1378416022c9SBarry Smith a->M = M; 13791eb62cbbSBarry Smith 13801eb62cbbSBarry Smith /* build local table of row and column ownerships */ 13810452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1382579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 138317699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1384416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1385416022c9SBarry Smith a->rowners[0] = 0; 138617699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1387416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 13888a729477SBarry Smith } 138917699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 139017699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1391416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1392416022c9SBarry Smith a->cowners[0] = 0; 139317699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1394416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 13958a729477SBarry Smith } 139617699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 139717699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 13988a729477SBarry Smith 1399416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1400416022c9SBarry Smith PLogObjectParent(mat,a->A); 1401416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1402416022c9SBarry Smith PLogObjectParent(mat,a->B); 14038a729477SBarry Smith 14041eb62cbbSBarry Smith /* build cache for off array entries formed */ 1405416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1406416022c9SBarry Smith a->colmap = 0; 1407416022c9SBarry Smith a->garray = 0; 14088a729477SBarry Smith 14091eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1410416022c9SBarry Smith a->lvec = 0; 1411416022c9SBarry Smith a->Mvctx = 0; 1412416022c9SBarry Smith a->assembled = 0; 14138a729477SBarry Smith 1414d6dfbf8fSBarry Smith *newmat = mat; 1415d6dfbf8fSBarry Smith return 0; 1416d6dfbf8fSBarry Smith } 1417c74985f6SBarry Smith 1418a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1419d6dfbf8fSBarry Smith { 1420d6dfbf8fSBarry Smith Mat mat; 1421416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 14222ee70a88SLois Curfman McInnes int ierr, len; 1423d6dfbf8fSBarry Smith 1424416022c9SBarry Smith if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1425416022c9SBarry Smith *newmat = 0; 14260452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1427a5a9c739SBarry Smith PLogObjectCreate(mat); 14280452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1429416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 143044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 143144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1432d6dfbf8fSBarry Smith mat->factor = matin->factor; 1433d6dfbf8fSBarry Smith 1434416022c9SBarry Smith a->m = oldmat->m; 1435416022c9SBarry Smith a->n = oldmat->n; 1436416022c9SBarry Smith a->M = oldmat->M; 1437416022c9SBarry Smith a->N = oldmat->N; 1438d6dfbf8fSBarry Smith 1439416022c9SBarry Smith a->assembled = 1; 1440416022c9SBarry Smith a->rstart = oldmat->rstart; 1441416022c9SBarry Smith a->rend = oldmat->rend; 1442416022c9SBarry Smith a->cstart = oldmat->cstart; 1443416022c9SBarry Smith a->cend = oldmat->cend; 144417699dbbSLois Curfman McInnes a->size = oldmat->size; 144517699dbbSLois Curfman McInnes a->rank = oldmat->rank; 144664119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1447d6dfbf8fSBarry Smith 14480452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 144917699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 145017699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1451416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14522ee70a88SLois Curfman McInnes if (oldmat->colmap) { 14530452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1454416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1455416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1456416022c9SBarry Smith } else a->colmap = 0; 1457ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 14580452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1459464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1460416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1461416022c9SBarry Smith } else a->garray = 0; 1462d6dfbf8fSBarry Smith 1463416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1464416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1465a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1466416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1467416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1468416022c9SBarry Smith PLogObjectParent(mat,a->A); 1469416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1470416022c9SBarry Smith PLogObjectParent(mat,a->B); 14718a729477SBarry Smith *newmat = mat; 14728a729477SBarry Smith return 0; 14738a729477SBarry Smith } 1474416022c9SBarry Smith 1475416022c9SBarry Smith #include "sysio.h" 1476416022c9SBarry Smith 147702834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1478416022c9SBarry Smith { 1479d65a2f8fSBarry Smith Mat A; 1480416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1481d65a2f8fSBarry Smith Scalar *vals,*svals; 1482416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1483416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1484416022c9SBarry Smith MPI_Status status; 148517699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1486d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1487d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1488416022c9SBarry Smith 148917699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 149017699dbbSLois Curfman McInnes if (!rank) { 1491416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1492416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 149302834360SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1494416022c9SBarry Smith } 1495416022c9SBarry Smith 1496416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1497416022c9SBarry Smith M = header[1]; N = header[2]; 1498416022c9SBarry Smith /* determine ownership of all rows */ 149917699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15000452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1501416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1502416022c9SBarry Smith rowners[0] = 0; 150317699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1504416022c9SBarry Smith rowners[i] += rowners[i-1]; 1505416022c9SBarry Smith } 150617699dbbSLois Curfman McInnes rstart = rowners[rank]; 150717699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1508416022c9SBarry Smith 1509416022c9SBarry Smith /* distribute row lengths to all processors */ 15100452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1511416022c9SBarry Smith offlens = ourlens + (rend-rstart); 151217699dbbSLois Curfman McInnes if (!rank) { 15130452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1514416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 15150452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 151617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1517d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15180452661fSBarry Smith PetscFree(sndcounts); 1519416022c9SBarry Smith } 1520416022c9SBarry Smith else { 1521416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1522416022c9SBarry Smith } 1523416022c9SBarry Smith 152417699dbbSLois Curfman McInnes if (!rank) { 1525416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15260452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1527cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 152817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1529416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1530416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1531416022c9SBarry Smith } 1532416022c9SBarry Smith } 15330452661fSBarry Smith PetscFree(rowlengths); 1534416022c9SBarry Smith 1535416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1536416022c9SBarry Smith maxnz = 0; 153717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15380452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1539416022c9SBarry Smith } 15400452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1541416022c9SBarry Smith 1542416022c9SBarry Smith /* read in my part of the matrix column indices */ 1543416022c9SBarry Smith nz = procsnz[0]; 15440452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1545d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1546d65a2f8fSBarry Smith 1547d65a2f8fSBarry Smith /* read in every one elses and ship off */ 154817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1549d65a2f8fSBarry Smith nz = procsnz[i]; 1550416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1551d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1552d65a2f8fSBarry Smith } 15530452661fSBarry Smith PetscFree(cols); 1554416022c9SBarry Smith } 1555416022c9SBarry Smith else { 1556416022c9SBarry Smith /* determine buffer space needed for message */ 1557416022c9SBarry Smith nz = 0; 1558416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1559416022c9SBarry Smith nz += ourlens[i]; 1560416022c9SBarry Smith } 15610452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1562416022c9SBarry Smith 1563416022c9SBarry Smith /* receive message of column indices*/ 1564d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1565416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 156602834360SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1567416022c9SBarry Smith } 1568416022c9SBarry Smith 1569416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1570cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1571416022c9SBarry Smith jj = 0; 1572416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1573416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1574d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1575416022c9SBarry Smith jj++; 1576416022c9SBarry Smith } 1577416022c9SBarry Smith } 1578d65a2f8fSBarry Smith 1579d65a2f8fSBarry Smith /* create our matrix */ 1580416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1581416022c9SBarry Smith ourlens[i] -= offlens[i]; 1582416022c9SBarry Smith } 1583d65a2f8fSBarry Smith if (type == MATMPIAIJ) { 1584d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1585d65a2f8fSBarry Smith } 1586d65a2f8fSBarry Smith else if (type == MATMPIROW) { 1587d65a2f8fSBarry Smith ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1588d65a2f8fSBarry Smith } 1589d65a2f8fSBarry Smith A = *newmat; 1590d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1591d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1592d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1593d65a2f8fSBarry Smith } 1594416022c9SBarry Smith 159517699dbbSLois Curfman McInnes if (!rank) { 15960452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1597416022c9SBarry Smith 1598416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1599416022c9SBarry Smith nz = procsnz[0]; 1600416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1601d65a2f8fSBarry Smith 1602d65a2f8fSBarry Smith /* insert into matrix */ 1603d65a2f8fSBarry Smith jj = rstart; 1604d65a2f8fSBarry Smith smycols = mycols; 1605d65a2f8fSBarry Smith svals = vals; 1606d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1607d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1608d65a2f8fSBarry Smith smycols += ourlens[i]; 1609d65a2f8fSBarry Smith svals += ourlens[i]; 1610d65a2f8fSBarry Smith jj++; 1611416022c9SBarry Smith } 1612416022c9SBarry Smith 1613d65a2f8fSBarry Smith /* read in other processors and ship out */ 161417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1615416022c9SBarry Smith nz = procsnz[i]; 1616416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1617d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1618416022c9SBarry Smith } 16190452661fSBarry Smith PetscFree(procsnz); 1620416022c9SBarry Smith } 1621d65a2f8fSBarry Smith else { 1622d65a2f8fSBarry Smith /* receive numeric values */ 16230452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1624416022c9SBarry Smith 1625d65a2f8fSBarry Smith /* receive message of values*/ 1626d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1627d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 162802834360SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1629d65a2f8fSBarry Smith 1630d65a2f8fSBarry Smith /* insert into matrix */ 1631d65a2f8fSBarry Smith jj = rstart; 1632d65a2f8fSBarry Smith smycols = mycols; 1633d65a2f8fSBarry Smith svals = vals; 1634d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1635d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1636d65a2f8fSBarry Smith smycols += ourlens[i]; 1637d65a2f8fSBarry Smith svals += ourlens[i]; 1638d65a2f8fSBarry Smith jj++; 1639d65a2f8fSBarry Smith } 1640d65a2f8fSBarry Smith } 16410452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1642d65a2f8fSBarry Smith 1643d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1644d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1645416022c9SBarry Smith return 0; 1646416022c9SBarry Smith } 1647