1cb512458SBarry Smith #ifndef lint 2*a56f8943SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.92 1995/10/20 01:59:55 curfman Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 2078b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22416022c9SBarry Smith PetscZero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 392ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 401eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46dbb450caSBarry Smith int shift = C->indexshift; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 53bbb6d6a8SBarry Smith if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54bbb6d6a8SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 551eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 561eb62cbbSBarry Smith row = idxm[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 58bbb6d6a8SBarry Smith if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59bbb6d6a8SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 601eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 611eb62cbbSBarry Smith col = idxn[j] - cstart; 6278b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 631eb62cbbSBarry Smith } 641eb62cbbSBarry Smith else { 65d6dfbf8fSBarry Smith if (aij->assembled) { 66b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67dbb450caSBarry Smith col = aij->colmap[idxn[j]] + shift; 68ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 692493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 702493cbb0SBarry Smith col = idxn[j]; 71d6dfbf8fSBarry Smith } 729e25ed09SBarry Smith } 739e25ed09SBarry Smith else col = idxn[j]; 7478b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 751eb62cbbSBarry Smith } 761eb62cbbSBarry Smith } 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith else { 79416022c9SBarry Smith ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 801eb62cbbSBarry Smith } 818a729477SBarry Smith } 828a729477SBarry Smith return 0; 838a729477SBarry Smith } 848a729477SBarry Smith 85fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 868a729477SBarry Smith { 8744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 8917699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 9017699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 911eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 926abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 931eb62cbbSBarry Smith InsertMode addv; 941eb62cbbSBarry Smith Scalar *rvalues,*svalues; 951eb62cbbSBarry Smith 961eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 97d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 99bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1001eb62cbbSBarry Smith } 1011eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1021eb62cbbSBarry Smith 1031eb62cbbSBarry Smith /* first count number of contributors to each processor */ 10417699dbbSLois Curfman McInnes nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 10517699dbbSLois Curfman McInnes PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 10678b31e54SBarry Smith owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1071eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1081eb62cbbSBarry Smith idx = aij->stash.idx[i]; 10917699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1101eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1111eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1128a729477SBarry Smith } 1138a729477SBarry Smith } 1148a729477SBarry Smith } 11517699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1161eb62cbbSBarry Smith 1171eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 11817699dbbSLois Curfman McInnes work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 11917699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 12017699dbbSLois Curfman McInnes nreceives = work[rank]; 12117699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 12217699dbbSLois Curfman McInnes nmax = work[rank]; 12378b31e54SBarry Smith PETSCFREE(work); 1241eb62cbbSBarry Smith 1251eb62cbbSBarry Smith /* post receives: 1261eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1271eb62cbbSBarry Smith (global index,value) we store the global index as a double 128d6dfbf8fSBarry Smith to simplify the message passing. 1291eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1301eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1311eb62cbbSBarry Smith this is a lot of wasted space. 1321eb62cbbSBarry Smith 1331eb62cbbSBarry Smith 1341eb62cbbSBarry Smith This could be done better. 1351eb62cbbSBarry Smith */ 13678b31e54SBarry Smith rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 13778b31e54SBarry Smith CHKPTRQ(rvalues); 13878b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 13978b31e54SBarry Smith CHKPTRQ(recv_waits); 1401eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 141416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1421eb62cbbSBarry Smith comm,recv_waits+i); 1431eb62cbbSBarry Smith } 1441eb62cbbSBarry Smith 1451eb62cbbSBarry Smith /* do sends: 1461eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1471eb62cbbSBarry Smith the ith processor 1481eb62cbbSBarry Smith */ 149416022c9SBarry Smith svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 15078b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 15178b31e54SBarry Smith CHKPTRQ(send_waits); 15217699dbbSLois Curfman McInnes starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 1531eb62cbbSBarry Smith starts[0] = 0; 15417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1551eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1561eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1571eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1581eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1591eb62cbbSBarry Smith } 16078b31e54SBarry Smith PETSCFREE(owner); 1611eb62cbbSBarry Smith starts[0] = 0; 16217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1631eb62cbbSBarry Smith count = 0; 16417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1651eb62cbbSBarry Smith if (procs[i]) { 166416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 1671eb62cbbSBarry Smith comm,send_waits+count++); 1681eb62cbbSBarry Smith } 1691eb62cbbSBarry Smith } 17078b31e54SBarry Smith PETSCFREE(starts); PETSCFREE(nprocs); 1711eb62cbbSBarry Smith 1721eb62cbbSBarry Smith /* Free cache space */ 17378b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 1741eb62cbbSBarry Smith 1751eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1761eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1771eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1781eb62cbbSBarry Smith aij->rmax = nmax; 1791eb62cbbSBarry Smith 1801eb62cbbSBarry Smith return 0; 1811eb62cbbSBarry Smith } 18244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 1831eb62cbbSBarry Smith 184fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 1851eb62cbbSBarry Smith { 18644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 1881eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 189416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 1911eb62cbbSBarry Smith Scalar *values,val; 1921eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 1931eb62cbbSBarry Smith 1941eb62cbbSBarry Smith /* wait on receives */ 1951eb62cbbSBarry Smith while (count) { 196d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 1971eb62cbbSBarry Smith /* unpack receives into our local space */ 198d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 199c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2001eb62cbbSBarry Smith n = n/3; 2011eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2021eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2031eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2041eb62cbbSBarry Smith val = values[3*i+2]; 2051eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2061eb62cbbSBarry Smith col -= aij->cstart; 2071eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2081eb62cbbSBarry Smith } 2091eb62cbbSBarry Smith else { 210d6dfbf8fSBarry Smith if (aij->assembled) { 211b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212dbb450caSBarry Smith col = aij->colmap[col] + shift; 213ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2142493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2152493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 216d6dfbf8fSBarry Smith } 2179e25ed09SBarry Smith } 2181eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2191eb62cbbSBarry Smith } 2201eb62cbbSBarry Smith } 2211eb62cbbSBarry Smith count--; 2221eb62cbbSBarry Smith } 22378b31e54SBarry Smith PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 2241eb62cbbSBarry Smith 2251eb62cbbSBarry Smith /* wait on sends */ 2261eb62cbbSBarry Smith if (aij->nsends) { 22778b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 22878b31e54SBarry Smith CHKPTRQ(send_status); 2291eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 23078b31e54SBarry Smith PETSCFREE(send_status); 2311eb62cbbSBarry Smith } 23278b31e54SBarry Smith PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 2331eb62cbbSBarry Smith 23464119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 23578b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 23678b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2371eb62cbbSBarry Smith 2382493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2392493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 240416022c9SBarry Smith MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 2412493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2422493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2432493cbb0SBarry Smith } 2442493cbb0SBarry Smith 2455e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 24678b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247d6dfbf8fSBarry Smith aij->assembled = 1; 2485e42470aSBarry Smith } 24978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 25078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2515e42470aSBarry Smith 2528a729477SBarry Smith return 0; 2538a729477SBarry Smith } 2548a729477SBarry Smith 2552ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2561eb62cbbSBarry Smith { 25744a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258dbd7a890SLois Curfman McInnes int ierr; 25978b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 26078b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2611eb62cbbSBarry Smith return 0; 2621eb62cbbSBarry Smith } 2631eb62cbbSBarry Smith 2641eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2651eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2661eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2671eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2681eb62cbbSBarry Smith routine. 2691eb62cbbSBarry Smith */ 27044a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 2711eb62cbbSBarry Smith { 27244a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 27317699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2746abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 27517699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2765392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 277d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 278d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2791eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2801eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2811eb62cbbSBarry Smith IS istmp; 2821eb62cbbSBarry Smith 28348d91487SBarry Smith if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 28478b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 28578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2861eb62cbbSBarry Smith 2871eb62cbbSBarry Smith /* first count number of contributors to each processor */ 28817699dbbSLois Curfman McInnes nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 28917699dbbSLois Curfman McInnes PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 29078b31e54SBarry Smith owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2911eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 2921eb62cbbSBarry Smith idx = rows[i]; 2931eb62cbbSBarry Smith found = 0; 29417699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 2951eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 2961eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2971eb62cbbSBarry Smith } 2981eb62cbbSBarry Smith } 299bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3001eb62cbbSBarry Smith } 30117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3021eb62cbbSBarry Smith 3031eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 30417699dbbSLois Curfman McInnes work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 30517699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 30617699dbbSLois Curfman McInnes nrecvs = work[rank]; 30717699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 30817699dbbSLois Curfman McInnes nmax = work[rank]; 30978b31e54SBarry Smith PETSCFREE(work); 3101eb62cbbSBarry Smith 3111eb62cbbSBarry Smith /* post receives: */ 31278b31e54SBarry Smith rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 31378b31e54SBarry Smith CHKPTRQ(rvalues); 31478b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 31578b31e54SBarry Smith CHKPTRQ(recv_waits); 3161eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 317416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3181eb62cbbSBarry Smith } 3191eb62cbbSBarry Smith 3201eb62cbbSBarry Smith /* do sends: 3211eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3221eb62cbbSBarry Smith the ith processor 3231eb62cbbSBarry Smith */ 32478b31e54SBarry Smith svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 32578b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 32678b31e54SBarry Smith CHKPTRQ(send_waits); 32717699dbbSLois Curfman McInnes starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3281eb62cbbSBarry Smith starts[0] = 0; 32917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3301eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3311eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3321eb62cbbSBarry Smith } 3331eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3341eb62cbbSBarry Smith 3351eb62cbbSBarry Smith starts[0] = 0; 33617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3371eb62cbbSBarry Smith count = 0; 33817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3391eb62cbbSBarry Smith if (procs[i]) { 340416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3411eb62cbbSBarry Smith } 3421eb62cbbSBarry Smith } 34378b31e54SBarry Smith PETSCFREE(starts); 3441eb62cbbSBarry Smith 34517699dbbSLois Curfman McInnes base = owners[rank]; 3461eb62cbbSBarry Smith 3471eb62cbbSBarry Smith /* wait on receives */ 34878b31e54SBarry Smith lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3491eb62cbbSBarry Smith source = lens + nrecvs; 3501eb62cbbSBarry Smith count = nrecvs; slen = 0; 3511eb62cbbSBarry Smith while (count) { 352d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3531eb62cbbSBarry Smith /* unpack receives into our local space */ 3541eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 355d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 356d6dfbf8fSBarry Smith lens[imdex] = n; 3571eb62cbbSBarry Smith slen += n; 3581eb62cbbSBarry Smith count--; 3591eb62cbbSBarry Smith } 36078b31e54SBarry Smith PETSCFREE(recv_waits); 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* move the data into the send scatter */ 36378b31e54SBarry Smith lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3641eb62cbbSBarry Smith count = 0; 3651eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3661eb62cbbSBarry Smith values = rvalues + i*nmax; 3671eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3681eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3691eb62cbbSBarry Smith } 3701eb62cbbSBarry Smith } 37178b31e54SBarry Smith PETSCFREE(rvalues); PETSCFREE(lens); 37278b31e54SBarry Smith PETSCFREE(owner); PETSCFREE(nprocs); 3731eb62cbbSBarry Smith 3741eb62cbbSBarry Smith /* actually zap the local rows */ 375416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376464493b3SBarry Smith PLogObjectParent(A,istmp); 377416022c9SBarry Smith PETSCFREE(lrows); 37878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 37978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 38078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 3811eb62cbbSBarry Smith 3821eb62cbbSBarry Smith /* wait on sends */ 3831eb62cbbSBarry Smith if (nsends) { 38478b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 38578b31e54SBarry Smith CHKPTRQ(send_status); 3861eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 38778b31e54SBarry Smith PETSCFREE(send_status); 3881eb62cbbSBarry Smith } 38978b31e54SBarry Smith PETSCFREE(send_waits); PETSCFREE(svalues); 3901eb62cbbSBarry Smith 3911eb62cbbSBarry Smith return 0; 3921eb62cbbSBarry Smith } 3931eb62cbbSBarry Smith 394416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 3951eb62cbbSBarry Smith { 396416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 3971eb62cbbSBarry Smith int ierr; 398416022c9SBarry Smith 39948d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 40064119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 40264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4041eb62cbbSBarry Smith return 0; 4051eb62cbbSBarry Smith } 4061eb62cbbSBarry Smith 407416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408da3a660dSBarry Smith { 409416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410da3a660dSBarry Smith int ierr; 41148d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 41264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 41464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416da3a660dSBarry Smith return 0; 417da3a660dSBarry Smith } 418da3a660dSBarry Smith 419416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420da3a660dSBarry Smith { 421416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422da3a660dSBarry Smith int ierr; 423da3a660dSBarry Smith 42448d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425da3a660dSBarry Smith /* do nondiagonal part */ 426416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427da3a660dSBarry Smith /* send it on its way */ 428416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 42964119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430da3a660dSBarry Smith /* do local part */ 431416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 433da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 434da3a660dSBarry Smith /* but is not perhaps always true. */ 435416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 43664119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437da3a660dSBarry Smith return 0; 438da3a660dSBarry Smith } 439da3a660dSBarry Smith 440416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441da3a660dSBarry Smith { 442416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443da3a660dSBarry Smith int ierr; 444da3a660dSBarry Smith 44548d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446da3a660dSBarry Smith /* do nondiagonal part */ 447416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448da3a660dSBarry Smith /* send it on its way */ 449416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 45064119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451da3a660dSBarry Smith /* do local part */ 452416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 454da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 455da3a660dSBarry Smith /* but is not perhaps always true. */ 456416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 45764119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458da3a660dSBarry Smith return 0; 459da3a660dSBarry Smith } 460da3a660dSBarry Smith 4611eb62cbbSBarry Smith /* 4621eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4631eb62cbbSBarry Smith diagonal block 4641eb62cbbSBarry Smith */ 465416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 4661eb62cbbSBarry Smith { 467416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 46848d91487SBarry Smith if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469416022c9SBarry Smith return MatGetDiagonal(a->A,v); 4701eb62cbbSBarry Smith } 4711eb62cbbSBarry Smith 47244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 4731eb62cbbSBarry Smith { 4741eb62cbbSBarry Smith Mat mat = (Mat) obj; 47544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4761eb62cbbSBarry Smith int ierr; 477a5a9c739SBarry Smith #if defined(PETSC_LOG) 4786e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479a5a9c739SBarry Smith #endif 48078b31e54SBarry Smith PETSCFREE(aij->rowners); 48178b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 48278b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 48378b31e54SBarry Smith if (aij->colmap) PETSCFREE(aij->colmap); 48478b31e54SBarry Smith if (aij->garray) PETSCFREE(aij->garray); 4851eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 486*a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 48778b31e54SBarry Smith PETSCFREE(aij); 488a5a9c739SBarry Smith PLogObjectDestroy(mat); 489a5a9c739SBarry Smith PETSCHEADERDESTROY(mat); 4901eb62cbbSBarry Smith return 0; 4911eb62cbbSBarry Smith } 4927857610eSBarry Smith #include "draw.h" 493b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 494ee50ffe9SBarry Smith 495416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 4961eb62cbbSBarry Smith { 497416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498416022c9SBarry Smith int ierr; 499416022c9SBarry Smith 50017699dbbSLois Curfman McInnes if (aij->size == 1) { 501416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502416022c9SBarry Smith } 503a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 504416022c9SBarry Smith return 0; 505416022c9SBarry Smith } 506416022c9SBarry Smith 507416022c9SBarry Smith static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 508416022c9SBarry Smith { 50944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 511*a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 512d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 513d636dbe3SBarry Smith FILE *fd; 514416022c9SBarry Smith 515416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 516416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 517416022c9SBarry Smith if (format == FILE_FORMAT_INFO) { 518*a56f8943SBarry Smith int nz,nzalloc,mem; 519*a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 520*a56f8943SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 521*a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 522*a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 523*a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 524*a56f8943SBarry Smith nzalloc,mem); 525*a56f8943SBarry Smith fflush(fd); 526*a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 527*a56f8943SBarry Smith ierr = VecScatterView(aij->Mvctx,viewer); 528416022c9SBarry Smith return 0; 529416022c9SBarry Smith } 530416022c9SBarry Smith } 531416022c9SBarry Smith 532416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 533d636dbe3SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 5347f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 535d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 53617699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5371eb62cbbSBarry Smith aij->cend); 53878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 53978b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 540d13ab20cSBarry Smith fflush(fd); 5417f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 542d13ab20cSBarry Smith } 543416022c9SBarry Smith else { 544*a56f8943SBarry Smith int size = aij->size; 545*a56f8943SBarry Smith rank = aij->rank; 54617699dbbSLois Curfman McInnes if (size == 1) { 54778b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 54895373324SBarry Smith } 54995373324SBarry Smith else { 55095373324SBarry Smith /* assemble the entire matrix onto first processor. */ 55195373324SBarry Smith Mat A; 552ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 5532eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 55495373324SBarry Smith Scalar *a; 5552ee70a88SLois Curfman McInnes 55617699dbbSLois Curfman McInnes if (!rank) { 557416022c9SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 55895373324SBarry Smith } 55995373324SBarry Smith else { 560416022c9SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 56195373324SBarry Smith } 562464493b3SBarry Smith PLogObjectParent(mat,A); 563416022c9SBarry Smith 56495373324SBarry Smith 56595373324SBarry Smith /* copy over the A part */ 566ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 5672ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 56895373324SBarry Smith row = aij->rstart; 569dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 57095373324SBarry Smith for ( i=0; i<m; i++ ) { 571416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 57295373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 57395373324SBarry Smith } 5742ee70a88SLois Curfman McInnes aj = Aloc->j; 575dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 57695373324SBarry Smith 57795373324SBarry Smith /* copy over the B part */ 578ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 5792ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 58095373324SBarry Smith row = aij->rstart; 58178b31e54SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 582dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 58395373324SBarry Smith for ( i=0; i<m; i++ ) { 584416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 58595373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 58695373324SBarry Smith } 58778b31e54SBarry Smith PETSCFREE(ct); 58878b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58978b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 59017699dbbSLois Curfman McInnes if (!rank) { 59178b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 59295373324SBarry Smith } 59378b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 59495373324SBarry Smith } 59595373324SBarry Smith } 5961eb62cbbSBarry Smith return 0; 5971eb62cbbSBarry Smith } 5981eb62cbbSBarry Smith 599416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 600416022c9SBarry Smith { 601416022c9SBarry Smith Mat mat = (Mat) obj; 602416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 603416022c9SBarry Smith int ierr; 604416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 605416022c9SBarry Smith 60648d91487SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 607416022c9SBarry Smith if (!viewer) { 608416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 609416022c9SBarry Smith } 610416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 611416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 612416022c9SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 613416022c9SBarry Smith } 614416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 615416022c9SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 616416022c9SBarry Smith } 617416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 618416022c9SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 619416022c9SBarry Smith } 620416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 621416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 622416022c9SBarry Smith } 623416022c9SBarry Smith return 0; 624416022c9SBarry Smith } 625416022c9SBarry Smith 626ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6271eb62cbbSBarry Smith /* 6281eb62cbbSBarry Smith This has to provide several versions. 6291eb62cbbSBarry Smith 6301eb62cbbSBarry Smith 1) per sequential 6311eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6321eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 633d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6341eb62cbbSBarry Smith */ 635fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 636dbb450caSBarry Smith double fshift,int its,Vec xx) 6378a729477SBarry Smith { 63844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 639d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 640ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6416abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6426abc6512SBarry Smith int ierr,*idx, *diag; 643416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 644da3a660dSBarry Smith Vec tt; 6458a729477SBarry Smith 64648d91487SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 6471eb62cbbSBarry Smith 648d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 649dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 650dbb450caSBarry Smith ls = ls + shift; 651ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 652d6dfbf8fSBarry Smith diag = A->diag; 653acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 65448d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 655acb40c82SBarry Smith } 656da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 657da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 658da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 659da3a660dSBarry Smith 660da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 661da3a660dSBarry Smith 662da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 663da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 664da3a660dSBarry Smith is the relaxation factor. 665da3a660dSBarry Smith */ 66678b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 667464493b3SBarry Smith PLogObjectParent(matin,tt); 668da3a660dSBarry Smith VecGetArray(tt,&t); 669da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 670da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 671da3a660dSBarry Smith VecSet(&zero,mat->lvec); 67264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 67378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 674da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 675da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 676dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 677dbb450caSBarry Smith v = A->a + diag[i] + !shift; 678da3a660dSBarry Smith sum = b[i]; 679da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 680dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 681da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 682dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 683dbb450caSBarry Smith v = B->a + B->i[i] + shift; 684da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 685da3a660dSBarry Smith x[i] = omega*(sum/d); 686da3a660dSBarry Smith } 68764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 68878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 689da3a660dSBarry Smith 690da3a660dSBarry Smith /* t = b - (2*E - D)x */ 691da3a660dSBarry Smith v = A->a; 692dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 693da3a660dSBarry Smith 694da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 695dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 696da3a660dSBarry Smith diag = A->diag; 697da3a660dSBarry Smith VecSet(&zero,mat->lvec); 69864119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 69978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 700da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 701da3a660dSBarry Smith n = diag[i] - A->i[i]; 702dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 703dbb450caSBarry Smith v = A->a + A->i[i] + shift; 704da3a660dSBarry Smith sum = t[i]; 705da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 706dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 707da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 708dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 709dbb450caSBarry Smith v = B->a + B->i[i] + shift; 710da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 711da3a660dSBarry Smith t[i] = omega*(sum/d); 712da3a660dSBarry Smith } 71364119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 71478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 715da3a660dSBarry Smith /* x = x + t */ 716da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 717da3a660dSBarry Smith VecDestroy(tt); 718da3a660dSBarry Smith return 0; 719da3a660dSBarry Smith } 720da3a660dSBarry Smith 7211eb62cbbSBarry Smith 722d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 723da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 724da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 725da3a660dSBarry Smith } 726da3a660dSBarry Smith else { 72764119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 72878b31e54SBarry Smith CHKERRQ(ierr); 72964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 73078b31e54SBarry Smith CHKERRQ(ierr); 731da3a660dSBarry Smith } 732d6dfbf8fSBarry Smith while (its--) { 733d6dfbf8fSBarry Smith /* go down through the rows */ 73464119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 73578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 736d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 737d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 738dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 739dbb450caSBarry Smith v = A->a + A->i[i] + shift; 740d6dfbf8fSBarry Smith sum = b[i]; 741d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 742dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 743d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 744dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 745dbb450caSBarry Smith v = B->a + B->i[i] + shift; 746d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 747d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 748d6dfbf8fSBarry Smith } 74964119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 751d6dfbf8fSBarry Smith /* come up through the rows */ 75264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 75378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 754d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 755d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 756dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 757dbb450caSBarry Smith v = A->a + A->i[i] + shift; 758d6dfbf8fSBarry Smith sum = b[i]; 759d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 760dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 761d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 762dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 763dbb450caSBarry Smith v = B->a + B->i[i] + shift; 764d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 765d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 766d6dfbf8fSBarry Smith } 76764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 76878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 769d6dfbf8fSBarry Smith } 770d6dfbf8fSBarry Smith } 771d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 772da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 773da3a660dSBarry Smith VecSet(&zero,mat->lvec); 77464119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 77578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 776da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 777da3a660dSBarry Smith n = diag[i] - A->i[i]; 778dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 779dbb450caSBarry Smith v = A->a + A->i[i] + shift; 780da3a660dSBarry Smith sum = b[i]; 781da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 782dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 783da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 784dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 785dbb450caSBarry Smith v = B->a + B->i[i] + shift; 786da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 787da3a660dSBarry Smith x[i] = omega*(sum/d); 788da3a660dSBarry Smith } 78964119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 79078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 791da3a660dSBarry Smith its--; 792da3a660dSBarry Smith } 793d6dfbf8fSBarry Smith while (its--) { 79464119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 79578b31e54SBarry Smith CHKERRQ(ierr); 79664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 79778b31e54SBarry Smith CHKERRQ(ierr); 79864119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 79978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 800d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 801d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 802dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 803dbb450caSBarry Smith v = A->a + A->i[i] + shift; 804d6dfbf8fSBarry Smith sum = b[i]; 805d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 806dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 807d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 808dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 809dbb450caSBarry Smith v = B->a + B->i[i] + shift; 810d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 811d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 812d6dfbf8fSBarry Smith } 81364119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 81478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 815d6dfbf8fSBarry Smith } 816d6dfbf8fSBarry Smith } 817d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 818da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 819da3a660dSBarry Smith VecSet(&zero,mat->lvec); 82064119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 822da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 823da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 824dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 825dbb450caSBarry Smith v = A->a + diag[i] + !shift; 826da3a660dSBarry Smith sum = b[i]; 827da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 828dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 829da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 830dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 831dbb450caSBarry Smith v = B->a + B->i[i] + shift; 832da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 833da3a660dSBarry Smith x[i] = omega*(sum/d); 834da3a660dSBarry Smith } 83564119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 83678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 837da3a660dSBarry Smith its--; 838da3a660dSBarry Smith } 839d6dfbf8fSBarry Smith while (its--) { 84064119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 84178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 84264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 84378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 84464119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 84578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 846d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 847d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 848dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 849dbb450caSBarry Smith v = A->a + A->i[i] + shift; 850d6dfbf8fSBarry Smith sum = b[i]; 851d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 852dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 853d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 854dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 855dbb450caSBarry Smith v = B->a + B->i[i] + shift; 856d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 857d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 858d6dfbf8fSBarry Smith } 85964119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 86078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 861d6dfbf8fSBarry Smith } 862d6dfbf8fSBarry Smith } 863d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 864da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 865dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 866da3a660dSBarry Smith } 86764119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 86878b31e54SBarry Smith CHKERRQ(ierr); 86964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 87078b31e54SBarry Smith CHKERRQ(ierr); 871d6dfbf8fSBarry Smith while (its--) { 872d6dfbf8fSBarry Smith /* go down through the rows */ 873d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 874d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 875dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 876dbb450caSBarry Smith v = A->a + A->i[i] + shift; 877d6dfbf8fSBarry Smith sum = b[i]; 878d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 879dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 880d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 881dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 882dbb450caSBarry Smith v = B->a + B->i[i] + shift; 883d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 884d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 885d6dfbf8fSBarry Smith } 886d6dfbf8fSBarry Smith /* come up through the rows */ 887d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 888d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 889dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 890dbb450caSBarry Smith v = A->a + A->i[i] + shift; 891d6dfbf8fSBarry Smith sum = b[i]; 892d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 893dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 894d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 895dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 896dbb450caSBarry Smith v = B->a + B->i[i] + shift; 897d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 898d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 899d6dfbf8fSBarry Smith } 900d6dfbf8fSBarry Smith } 901d6dfbf8fSBarry Smith } 902d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 903da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 904dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 905da3a660dSBarry Smith } 90664119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 90778b31e54SBarry Smith CHKERRQ(ierr); 90864119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 90978b31e54SBarry Smith CHKERRQ(ierr); 910d6dfbf8fSBarry Smith while (its--) { 911d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 912d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 913dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 914dbb450caSBarry Smith v = A->a + A->i[i] + shift; 915d6dfbf8fSBarry Smith sum = b[i]; 916d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 917dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 918d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 919dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 920dbb450caSBarry Smith v = B->a + B->i[i] + shift; 921d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 922d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 923d6dfbf8fSBarry Smith } 924d6dfbf8fSBarry Smith } 925d6dfbf8fSBarry Smith } 926d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 927da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 928dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 929da3a660dSBarry Smith } 93064119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 93178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 93264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 93378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 934d6dfbf8fSBarry Smith while (its--) { 935d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 936d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 937dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 938dbb450caSBarry Smith v = A->a + A->i[i] + shift; 939d6dfbf8fSBarry Smith sum = b[i]; 940d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 941dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 942d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 943dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 944dbb450caSBarry Smith v = B->a + B->i[i] + shift; 945d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 946d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 947d6dfbf8fSBarry Smith } 948d6dfbf8fSBarry Smith } 949d6dfbf8fSBarry Smith } 9508a729477SBarry Smith return 0; 9518a729477SBarry Smith } 952a66be287SLois Curfman McInnes 953fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 954fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 955a66be287SLois Curfman McInnes { 956a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 957a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 958a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 959a66be287SLois Curfman McInnes 96078b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 96178b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 962a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 963a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 964a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 965a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 966d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 967a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 968a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 969d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 970a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 971a66be287SLois Curfman McInnes } 972a66be287SLois Curfman McInnes return 0; 973a66be287SLois Curfman McInnes } 974a66be287SLois Curfman McInnes 975299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 976299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 977299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 978299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 979299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 980299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 981299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 982299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 983299609e3SLois Curfman McInnes 984416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 985c74985f6SBarry Smith { 986c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 987c74985f6SBarry Smith 988c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 989c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 990c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 991c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 992c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 993c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 994c74985f6SBarry Smith } 995c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 996c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 997c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 998c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 999c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1000bbb6d6a8SBarry Smith else if (op == COLUMN_ORIENTED) 10014d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1002c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10034d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1004c0bbcb79SLois Curfman McInnes else 10054d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1006c74985f6SBarry Smith return 0; 1007c74985f6SBarry Smith } 1008c74985f6SBarry Smith 1009d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1010c74985f6SBarry Smith { 101144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1012c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1013c74985f6SBarry Smith return 0; 1014c74985f6SBarry Smith } 1015c74985f6SBarry Smith 1016d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1017c74985f6SBarry Smith { 101844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1019b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1020c74985f6SBarry Smith return 0; 1021c74985f6SBarry Smith } 1022c74985f6SBarry Smith 1023d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1024c74985f6SBarry Smith { 102544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1026c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1027c74985f6SBarry Smith return 0; 1028c74985f6SBarry Smith } 1029c74985f6SBarry Smith 103039e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 103139e00950SLois Curfman McInnes { 1032154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1033154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1034154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1035154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 103639e00950SLois Curfman McInnes 1037416022c9SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1038abc0e9e4SLois Curfman McInnes lrow = row - rstart; 103939e00950SLois Curfman McInnes 1040154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1041154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1042154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 104378b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 104478b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1045154123eaSLois Curfman McInnes nztot = nzA + nzB; 1046154123eaSLois Curfman McInnes 1047154123eaSLois Curfman McInnes if (v || idx) { 1048154123eaSLois Curfman McInnes if (nztot) { 1049154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1050299609e3SLois Curfman McInnes int imark; 105148b35521SBarry Smith if (mat->assembled) { 1052154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 105348b35521SBarry Smith } 1054154123eaSLois Curfman McInnes if (v) { 105578b31e54SBarry Smith *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 105639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1057154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1058154123eaSLois Curfman McInnes else break; 1059154123eaSLois Curfman McInnes } 1060154123eaSLois Curfman McInnes imark = i; 1061154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1062299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1063154123eaSLois Curfman McInnes } 1064154123eaSLois Curfman McInnes if (idx) { 106578b31e54SBarry Smith *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1066154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1067154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1068154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1069154123eaSLois Curfman McInnes else break; 1070154123eaSLois Curfman McInnes } 1071154123eaSLois Curfman McInnes imark = i; 1072154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1073299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 107439e00950SLois Curfman McInnes } 107539e00950SLois Curfman McInnes } 107639e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1077154123eaSLois Curfman McInnes } 107839e00950SLois Curfman McInnes *nz = nztot; 107978b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 108078b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 108139e00950SLois Curfman McInnes return 0; 108239e00950SLois Curfman McInnes } 108339e00950SLois Curfman McInnes 108439e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 108539e00950SLois Curfman McInnes { 108678b31e54SBarry Smith if (idx) PETSCFREE(*idx); 108778b31e54SBarry Smith if (v) PETSCFREE(*v); 108839e00950SLois Curfman McInnes return 0; 108939e00950SLois Curfman McInnes } 109039e00950SLois Curfman McInnes 1091855ac2c5SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1092855ac2c5SLois Curfman McInnes { 1093855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1094ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1095416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1096416022c9SBarry Smith double sum = 0.0; 109704ca555eSLois Curfman McInnes Scalar *v; 109804ca555eSLois Curfman McInnes 1099416022c9SBarry Smith if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 110017699dbbSLois Curfman McInnes if (aij->size == 1) { 110114183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 110237fa93a5SLois Curfman McInnes } else { 110304ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 110404ca555eSLois Curfman McInnes v = amat->a; 110504ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 110604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 110704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 110804ca555eSLois Curfman McInnes #else 110904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 111004ca555eSLois Curfman McInnes #endif 111104ca555eSLois Curfman McInnes } 111204ca555eSLois Curfman McInnes v = bmat->a; 111304ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 111404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 111504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 111604ca555eSLois Curfman McInnes #else 111704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 111804ca555eSLois Curfman McInnes #endif 111904ca555eSLois Curfman McInnes } 112004ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 112104ca555eSLois Curfman McInnes *norm = sqrt(*norm); 112204ca555eSLois Curfman McInnes } 112304ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 112404ca555eSLois Curfman McInnes double *tmp, *tmp2; 112504ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 112604ca555eSLois Curfman McInnes tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 112704ca555eSLois Curfman McInnes tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1128416022c9SBarry Smith PetscZero(tmp,aij->N*sizeof(double)); 112904ca555eSLois Curfman McInnes *norm = 0.0; 113004ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 113104ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 113204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1133dbb450caSBarry Smith tmp[cstart + *jj++ + shift] += abs(*v++); 113404ca555eSLois Curfman McInnes #else 1135dbb450caSBarry Smith tmp[cstart + *jj++ + shift] += fabs(*v++); 113604ca555eSLois Curfman McInnes #endif 113704ca555eSLois Curfman McInnes } 113804ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 113904ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 114004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1141dbb450caSBarry Smith tmp[garray[*jj++ + shift]] += abs(*v++); 114204ca555eSLois Curfman McInnes #else 1143dbb450caSBarry Smith tmp[garray[*jj++ + shift]] += fabs(*v++); 114404ca555eSLois Curfman McInnes #endif 114504ca555eSLois Curfman McInnes } 114604ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 114704ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 114804ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 114904ca555eSLois Curfman McInnes } 115004ca555eSLois Curfman McInnes PETSCFREE(tmp); PETSCFREE(tmp2); 115104ca555eSLois Curfman McInnes } 115204ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 115304ca555eSLois Curfman McInnes double normtemp = 0.0; 115404ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1155dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 115604ca555eSLois Curfman McInnes sum = 0.0; 115704ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 115804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 115904ca555eSLois Curfman McInnes sum += abs(*v); v++; 116004ca555eSLois Curfman McInnes #else 116104ca555eSLois Curfman McInnes sum += fabs(*v); v++; 116204ca555eSLois Curfman McInnes #endif 116304ca555eSLois Curfman McInnes } 1164dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 116504ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 116604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116704ca555eSLois Curfman McInnes sum += abs(*v); v++; 116804ca555eSLois Curfman McInnes #else 116904ca555eSLois Curfman McInnes sum += fabs(*v); v++; 117004ca555eSLois Curfman McInnes #endif 117104ca555eSLois Curfman McInnes } 117204ca555eSLois Curfman McInnes if (sum > normtemp) normtemp = sum; 117304ca555eSLois Curfman McInnes MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 117404ca555eSLois Curfman McInnes } 117504ca555eSLois Curfman McInnes } 117604ca555eSLois Curfman McInnes else { 117748d91487SBarry Smith SETERRQ(1,"MatNorm_MPIRow:No support for two norm"); 117804ca555eSLois Curfman McInnes } 117937fa93a5SLois Curfman McInnes } 1180855ac2c5SLois Curfman McInnes return 0; 1181855ac2c5SLois Curfman McInnes } 1182855ac2c5SLois Curfman McInnes 11830de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1184b7c46309SBarry Smith { 1185b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1186dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1187416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1188416022c9SBarry Smith Mat B; 1189b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1190b7c46309SBarry Smith Scalar *array; 1191b7c46309SBarry Smith 1192416022c9SBarry Smith if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1193b7c46309SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1194b7c46309SBarry Smith CHKERRQ(ierr); 1195b7c46309SBarry Smith 1196b7c46309SBarry Smith /* copy over the A part */ 1197ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1198b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1199b7c46309SBarry Smith row = a->rstart; 1200dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1201b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1202416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1203b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1204b7c46309SBarry Smith } 1205b7c46309SBarry Smith aj = Aloc->j; 1206dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1207b7c46309SBarry Smith 1208b7c46309SBarry Smith /* copy over the B part */ 1209ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1210b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1211b7c46309SBarry Smith row = a->rstart; 12121987afe7SBarry Smith ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1213dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1214b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1215416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1216b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1217b7c46309SBarry Smith } 1218b7c46309SBarry Smith PETSCFREE(ct); 1219b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1220b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12210de55854SLois Curfman McInnes if (matout) { 12220de55854SLois Curfman McInnes *matout = B; 12230de55854SLois Curfman McInnes } else { 12240de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12250de55854SLois Curfman McInnes PETSCFREE(a->rowners); 12260de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12270de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12280de55854SLois Curfman McInnes if (a->colmap) PETSCFREE(a->colmap); 12290de55854SLois Curfman McInnes if (a->garray) PETSCFREE(a->garray); 12300de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1231*a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12320de55854SLois Curfman McInnes PETSCFREE(a); 1233416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12340de55854SLois Curfman McInnes PETSCHEADERDESTROY(B); 12350de55854SLois Curfman McInnes } 1236b7c46309SBarry Smith return 0; 1237b7c46309SBarry Smith } 1238b7c46309SBarry Smith 1239fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1240*a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1241d6dfbf8fSBarry Smith 12428a729477SBarry Smith /* -------------------------------------------------------------------*/ 12432ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 124439e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 124544a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 124644a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1247299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1248299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1249299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 125044a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1251b7c46309SBarry Smith MatTranspose_MPIAIJ, 1252a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1253855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1254ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12551eb62cbbSBarry Smith 0, 1256299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1257299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1258299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1259d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1260299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1261ff7509f2SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 12628a729477SBarry Smith 12631987afe7SBarry Smith /*@C 1264ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1265ff756334SLois Curfman McInnes (the default parallel PETSc format). 12668a729477SBarry Smith 12678a729477SBarry Smith Input Parameters: 12681eb62cbbSBarry Smith . comm - MPI communicator 12697d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 12707d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 12717d3e4905SLois Curfman McInnes if N is given) 12727d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 12737d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 12747d3e4905SLois Curfman McInnes if n is given) 1275ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1276ff756334SLois Curfman McInnes (same for all local rows) 1277ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1278ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1279ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1280ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1281ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1282ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1283ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 12848a729477SBarry Smith 1285ff756334SLois Curfman McInnes Output Parameter: 12868a729477SBarry Smith . newmat - the matrix 12878a729477SBarry Smith 1288ff756334SLois Curfman McInnes Notes: 1289ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1290ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12910002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12920002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1293ff756334SLois Curfman McInnes 1294ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1295ff756334SLois Curfman McInnes (possibly both). 1296ff756334SLois Curfman McInnes 1297e0245417SLois Curfman McInnes Storage Information: 1298e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1299e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1300e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1301e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1302e0245417SLois Curfman McInnes 1303e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1304e0245417SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1305e0245417SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1306e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1307e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1308ff756334SLois Curfman McInnes 1309dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1310ff756334SLois Curfman McInnes 1311fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13128a729477SBarry Smith @*/ 13131eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13141eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 13158a729477SBarry Smith { 13168a729477SBarry Smith Mat mat; 1317416022c9SBarry Smith Mat_MPIAIJ *a; 13186abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1319416022c9SBarry Smith 13208a729477SBarry Smith *newmat = 0; 132144a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1322a5a9c739SBarry Smith PLogObjectCreate(mat); 1323416022c9SBarry Smith mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1324416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 132544a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 132644a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 13278a729477SBarry Smith mat->factor = 0; 1328d6dfbf8fSBarry Smith 132964119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 133017699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 133117699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 13321eb62cbbSBarry Smith 13331987afe7SBarry Smith if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 13341987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 13351987afe7SBarry Smith 1336dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13371eb62cbbSBarry Smith work[0] = m; work[1] = n; 1338d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1339dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1340dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13411eb62cbbSBarry Smith } 134217699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 134317699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1344416022c9SBarry Smith a->m = m; 1345416022c9SBarry Smith a->n = n; 1346416022c9SBarry Smith a->N = N; 1347416022c9SBarry Smith a->M = M; 13481eb62cbbSBarry Smith 13491eb62cbbSBarry Smith /* build local table of row and column ownerships */ 135017699dbbSLois Curfman McInnes a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 135117699dbbSLois Curfman McInnes PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1352464493b3SBarry Smith sizeof(Mat_MPIAIJ)); 135317699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1354416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1355416022c9SBarry Smith a->rowners[0] = 0; 135617699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1357416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 13588a729477SBarry Smith } 135917699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 136017699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1361416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1362416022c9SBarry Smith a->cowners[0] = 0; 136317699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1364416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 13658a729477SBarry Smith } 136617699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 136717699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 13688a729477SBarry Smith 1369416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1370416022c9SBarry Smith PLogObjectParent(mat,a->A); 1371416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1372416022c9SBarry Smith PLogObjectParent(mat,a->B); 13738a729477SBarry Smith 13741eb62cbbSBarry Smith /* build cache for off array entries formed */ 1375416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1376416022c9SBarry Smith a->colmap = 0; 1377416022c9SBarry Smith a->garray = 0; 13788a729477SBarry Smith 13791eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1380416022c9SBarry Smith a->lvec = 0; 1381416022c9SBarry Smith a->Mvctx = 0; 1382416022c9SBarry Smith a->assembled = 0; 13838a729477SBarry Smith 1384d6dfbf8fSBarry Smith *newmat = mat; 1385d6dfbf8fSBarry Smith return 0; 1386d6dfbf8fSBarry Smith } 1387c74985f6SBarry Smith 1388*a56f8943SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1389d6dfbf8fSBarry Smith { 1390d6dfbf8fSBarry Smith Mat mat; 1391416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 13922ee70a88SLois Curfman McInnes int ierr, len; 1393d6dfbf8fSBarry Smith 1394416022c9SBarry Smith if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1395416022c9SBarry Smith *newmat = 0; 139644a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1397a5a9c739SBarry Smith PLogObjectCreate(mat); 1398416022c9SBarry Smith mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1399416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 140044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 140144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1402d6dfbf8fSBarry Smith mat->factor = matin->factor; 1403d6dfbf8fSBarry Smith 1404416022c9SBarry Smith a->m = oldmat->m; 1405416022c9SBarry Smith a->n = oldmat->n; 1406416022c9SBarry Smith a->M = oldmat->M; 1407416022c9SBarry Smith a->N = oldmat->N; 1408d6dfbf8fSBarry Smith 1409416022c9SBarry Smith a->assembled = 1; 1410416022c9SBarry Smith a->rstart = oldmat->rstart; 1411416022c9SBarry Smith a->rend = oldmat->rend; 1412416022c9SBarry Smith a->cstart = oldmat->cstart; 1413416022c9SBarry Smith a->cend = oldmat->cend; 141417699dbbSLois Curfman McInnes a->size = oldmat->size; 141517699dbbSLois Curfman McInnes a->rank = oldmat->rank; 141664119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1417d6dfbf8fSBarry Smith 141817699dbbSLois Curfman McInnes a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 141917699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 142017699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1421416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14222ee70a88SLois Curfman McInnes if (oldmat->colmap) { 1423416022c9SBarry Smith a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1424416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1425416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1426416022c9SBarry Smith } else a->colmap = 0; 1427ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1428416022c9SBarry Smith a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray); 1429464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1430416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1431416022c9SBarry Smith } else a->garray = 0; 1432d6dfbf8fSBarry Smith 1433416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1434416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1435*a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1436416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1437416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1438416022c9SBarry Smith PLogObjectParent(mat,a->A); 1439416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1440416022c9SBarry Smith PLogObjectParent(mat,a->B); 14418a729477SBarry Smith *newmat = mat; 14428a729477SBarry Smith return 0; 14438a729477SBarry Smith } 1444416022c9SBarry Smith 1445416022c9SBarry Smith #include "sysio.h" 1446416022c9SBarry Smith 144702834360SBarry Smith int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1448416022c9SBarry Smith { 1449d65a2f8fSBarry Smith Mat A; 1450416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1451d65a2f8fSBarry Smith Scalar *vals,*svals; 1452416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1453416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1454416022c9SBarry Smith MPI_Status status; 145517699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1456d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1457d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1458416022c9SBarry Smith 145917699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 146017699dbbSLois Curfman McInnes if (!rank) { 1461416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1462416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 146302834360SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1464416022c9SBarry Smith } 1465416022c9SBarry Smith 1466416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1467416022c9SBarry Smith M = header[1]; N = header[2]; 1468416022c9SBarry Smith /* determine ownership of all rows */ 146917699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 147017699dbbSLois Curfman McInnes rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 1471416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1472416022c9SBarry Smith rowners[0] = 0; 147317699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1474416022c9SBarry Smith rowners[i] += rowners[i-1]; 1475416022c9SBarry Smith } 147617699dbbSLois Curfman McInnes rstart = rowners[rank]; 147717699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1478416022c9SBarry Smith 1479416022c9SBarry Smith /* distribute row lengths to all processors */ 1480416022c9SBarry Smith ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1481416022c9SBarry Smith offlens = ourlens + (rend-rstart); 148217699dbbSLois Curfman McInnes if (!rank) { 1483416022c9SBarry Smith rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1484416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 148517699dbbSLois Curfman McInnes sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 148617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1487d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 14886ef5f50fSLois Curfman McInnes PETSCFREE(sndcounts); 1489416022c9SBarry Smith } 1490416022c9SBarry Smith else { 1491416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1492416022c9SBarry Smith } 1493416022c9SBarry Smith 149417699dbbSLois Curfman McInnes if (!rank) { 1495416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 149617699dbbSLois Curfman McInnes procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 149717699dbbSLois Curfman McInnes PetscZero(procsnz,size*sizeof(int)); 149817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1499416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1500416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1501416022c9SBarry Smith } 1502416022c9SBarry Smith } 15036ef5f50fSLois Curfman McInnes PETSCFREE(rowlengths); 1504416022c9SBarry Smith 1505416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1506416022c9SBarry Smith maxnz = 0; 150717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1508416022c9SBarry Smith maxnz = PETSCMAX(maxnz,procsnz[i]); 1509416022c9SBarry Smith } 1510416022c9SBarry Smith cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1511416022c9SBarry Smith 1512416022c9SBarry Smith /* read in my part of the matrix column indices */ 1513416022c9SBarry Smith nz = procsnz[0]; 1514d65a2f8fSBarry Smith mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1515d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1516d65a2f8fSBarry Smith 1517d65a2f8fSBarry Smith /* read in every one elses and ship off */ 151817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1519d65a2f8fSBarry Smith nz = procsnz[i]; 1520416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1521d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1522d65a2f8fSBarry Smith } 1523d65a2f8fSBarry Smith PETSCFREE(cols); 1524416022c9SBarry Smith } 1525416022c9SBarry Smith else { 1526416022c9SBarry Smith /* determine buffer space needed for message */ 1527416022c9SBarry Smith nz = 0; 1528416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1529416022c9SBarry Smith nz += ourlens[i]; 1530416022c9SBarry Smith } 1531d65a2f8fSBarry Smith mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1532416022c9SBarry Smith 1533416022c9SBarry Smith /* receive message of column indices*/ 1534d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1535416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 153602834360SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1537416022c9SBarry Smith } 1538416022c9SBarry Smith 1539416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1540416022c9SBarry Smith PetscZero(offlens,m*sizeof(int)); 1541416022c9SBarry Smith jj = 0; 1542416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1543416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1544d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1545416022c9SBarry Smith jj++; 1546416022c9SBarry Smith } 1547416022c9SBarry Smith } 1548d65a2f8fSBarry Smith 1549d65a2f8fSBarry Smith /* create our matrix */ 1550416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1551416022c9SBarry Smith ourlens[i] -= offlens[i]; 1552416022c9SBarry Smith } 1553d65a2f8fSBarry Smith if (type == MATMPIAIJ) { 1554d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1555d65a2f8fSBarry Smith } 1556d65a2f8fSBarry Smith else if (type == MATMPIROW) { 1557d65a2f8fSBarry Smith ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1558d65a2f8fSBarry Smith } 1559d65a2f8fSBarry Smith A = *newmat; 1560d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1561d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1562d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1563d65a2f8fSBarry Smith } 1564416022c9SBarry Smith 156517699dbbSLois Curfman McInnes if (!rank) { 1566416022c9SBarry Smith vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1567416022c9SBarry Smith 1568416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1569416022c9SBarry Smith nz = procsnz[0]; 1570416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1571d65a2f8fSBarry Smith 1572d65a2f8fSBarry Smith /* insert into matrix */ 1573d65a2f8fSBarry Smith jj = rstart; 1574d65a2f8fSBarry Smith smycols = mycols; 1575d65a2f8fSBarry Smith svals = vals; 1576d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1577d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1578d65a2f8fSBarry Smith smycols += ourlens[i]; 1579d65a2f8fSBarry Smith svals += ourlens[i]; 1580d65a2f8fSBarry Smith jj++; 1581416022c9SBarry Smith } 1582416022c9SBarry Smith 1583d65a2f8fSBarry Smith /* read in other processors and ship out */ 158417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1585416022c9SBarry Smith nz = procsnz[i]; 1586416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1587d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1588416022c9SBarry Smith } 15896ef5f50fSLois Curfman McInnes PETSCFREE(procsnz); 1590416022c9SBarry Smith } 1591d65a2f8fSBarry Smith else { 1592d65a2f8fSBarry Smith /* receive numeric values */ 1593d65a2f8fSBarry Smith vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1594416022c9SBarry Smith 1595d65a2f8fSBarry Smith /* receive message of values*/ 1596d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1597d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 159802834360SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1599d65a2f8fSBarry Smith 1600d65a2f8fSBarry Smith /* insert into matrix */ 1601d65a2f8fSBarry Smith jj = rstart; 1602d65a2f8fSBarry Smith smycols = mycols; 1603d65a2f8fSBarry Smith svals = vals; 1604d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1605d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1606d65a2f8fSBarry Smith smycols += ourlens[i]; 1607d65a2f8fSBarry Smith svals += ourlens[i]; 1608d65a2f8fSBarry Smith jj++; 1609d65a2f8fSBarry Smith } 1610d65a2f8fSBarry Smith } 16116ef5f50fSLois Curfman McInnes PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1612d65a2f8fSBarry Smith 1613d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1614d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1615416022c9SBarry Smith return 0; 1616416022c9SBarry Smith } 1617