1905e6a2fSBarry Smith 2cb512458SBarry Smith #ifndef lint 3*4e220ebcSLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.164 1996/08/21 19:59:43 bsmith Exp curfman $"; 4cb512458SBarry Smith #endif 58a729477SBarry Smith 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 15905e6a2fSBarry Smith static int CreateColmap_MPIAIJ_Private(Mat mat) 169e25ed09SBarry Smith { 1744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 18ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 19905e6a2fSBarry Smith int n = B->n,i; 20dbb450caSBarry Smith 210452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 23cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 24905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 259e25ed09SBarry Smith return 0; 269e25ed09SBarry Smith } 279e25ed09SBarry Smith 282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 292493cbb0SBarry Smith 3070423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 3417699dbbSLois Curfman McInnes if (aij->size == 1) { 3551c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3648d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 37299609e3SLois Curfman McInnes return 0; 38299609e3SLois Curfman McInnes } 39299609e3SLois Curfman McInnes 404b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46905e6a2fSBarry Smith int roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67905e6a2fSBarry Smith if (!aij->colmap) { 68905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 69905e6a2fSBarry Smith } 70905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 71ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 722493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 734b0e389bSBarry Smith col = in[j]; 74d6dfbf8fSBarry Smith } 759e25ed09SBarry Smith } 764b0e389bSBarry Smith else col = in[j]; 774b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 784b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith } 811eb62cbbSBarry Smith } 821eb62cbbSBarry Smith else { 834b0e389bSBarry Smith if (roworiented) { 844b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 854b0e389bSBarry Smith } 864b0e389bSBarry Smith else { 874b0e389bSBarry Smith row = im[i]; 884b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 894b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 904b0e389bSBarry Smith } 914b0e389bSBarry Smith } 921eb62cbbSBarry Smith } 938a729477SBarry Smith } 948a729477SBarry Smith return 0; 958a729477SBarry Smith } 968a729477SBarry Smith 97b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 98b49de8d1SLois Curfman McInnes { 99b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 100b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 101b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116905e6a2fSBarry Smith if (!aij->colmap) { 117905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 118905e6a2fSBarry Smith } 119905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 120e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 121d9d09a02SSatish Balay else { 122b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes } 126d9d09a02SSatish Balay } 127b49de8d1SLois Curfman McInnes else { 128b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 129b49de8d1SLois Curfman McInnes } 130b49de8d1SLois Curfman McInnes } 131b49de8d1SLois Curfman McInnes return 0; 132b49de8d1SLois Curfman McInnes } 133b49de8d1SLois Curfman McInnes 134fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1358a729477SBarry Smith { 13644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 137d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13817699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13917699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1401eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1416abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1421eb62cbbSBarry Smith InsertMode addv; 1431eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1441eb62cbbSBarry Smith 1451eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 146d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 147dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 148bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1511eb62cbbSBarry Smith 1521eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1530452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 154cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1550452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1561eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1571eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1591eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1601eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1618a729477SBarry Smith } 1628a729477SBarry Smith } 1638a729477SBarry Smith } 16417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1651eb62cbbSBarry Smith 1661eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1670452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16817699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16917699dbbSLois Curfman McInnes nreceives = work[rank]; 17017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 17117699dbbSLois Curfman McInnes nmax = work[rank]; 1720452661fSBarry Smith PetscFree(work); 1731eb62cbbSBarry Smith 1741eb62cbbSBarry Smith /* post receives: 1751eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1761eb62cbbSBarry Smith (global index,value) we store the global index as a double 177d6dfbf8fSBarry Smith to simplify the message passing. 1781eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1791eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1801eb62cbbSBarry Smith this is a lot of wasted space. 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith 1831eb62cbbSBarry Smith This could be done better. 1841eb62cbbSBarry Smith */ 1850452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18678b31e54SBarry Smith CHKPTRQ(rvalues); 1870452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18878b31e54SBarry Smith CHKPTRQ(recv_waits); 1891eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 190416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1911eb62cbbSBarry Smith comm,recv_waits+i); 1921eb62cbbSBarry Smith } 1931eb62cbbSBarry Smith 1941eb62cbbSBarry Smith /* do sends: 1951eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1961eb62cbbSBarry Smith the ith processor 1971eb62cbbSBarry Smith */ 1980452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1990452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 20078b31e54SBarry Smith CHKPTRQ(send_waits); 2010452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2021eb62cbbSBarry Smith starts[0] = 0; 20317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2041eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2051eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2061eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2071eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2081eb62cbbSBarry Smith } 2090452661fSBarry Smith PetscFree(owner); 2101eb62cbbSBarry Smith starts[0] = 0; 21117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2121eb62cbbSBarry Smith count = 0; 21317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2141eb62cbbSBarry Smith if (procs[i]) { 215416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2161eb62cbbSBarry Smith comm,send_waits+count++); 2171eb62cbbSBarry Smith } 2181eb62cbbSBarry Smith } 2190452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2201eb62cbbSBarry Smith 2211eb62cbbSBarry Smith /* Free cache space */ 2222191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 22378b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2241eb62cbbSBarry Smith 2251eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2261eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2271eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2281eb62cbbSBarry Smith aij->rmax = nmax; 2291eb62cbbSBarry Smith 2301eb62cbbSBarry Smith return 0; 2311eb62cbbSBarry Smith } 23244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2331eb62cbbSBarry Smith 234fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2351eb62cbbSBarry Smith { 23644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2371eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 238416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 239905e6a2fSBarry Smith int row,col,other_disassembled; 2401eb62cbbSBarry Smith Scalar *values,val; 2411eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2421eb62cbbSBarry Smith 2431eb62cbbSBarry Smith /* wait on receives */ 2441eb62cbbSBarry Smith while (count) { 245d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2461eb62cbbSBarry Smith /* unpack receives into our local space */ 247d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 248c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2491eb62cbbSBarry Smith n = n/3; 2501eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 251227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 252227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2531eb62cbbSBarry Smith val = values[3*i+2]; 2541eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2551eb62cbbSBarry Smith col -= aij->cstart; 2561eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2571eb62cbbSBarry Smith } 2581eb62cbbSBarry Smith else { 259227d817aSBarry Smith if (mat->was_assembled) { 260905e6a2fSBarry Smith if (!aij->colmap) { 261905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262905e6a2fSBarry Smith } 263905e6a2fSBarry Smith col = aij->colmap[col] - 1; 264ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2652493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 266227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 267d6dfbf8fSBarry Smith } 2689e25ed09SBarry Smith } 2691eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith } 2721eb62cbbSBarry Smith count--; 2731eb62cbbSBarry Smith } 2740452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* wait on sends */ 2771eb62cbbSBarry Smith if (aij->nsends) { 2780452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27978b31e54SBarry Smith CHKPTRQ(send_status); 2801eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2810452661fSBarry Smith PetscFree(send_status); 2821eb62cbbSBarry Smith } 2830452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2841eb62cbbSBarry Smith 28564119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 28678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2881eb62cbbSBarry Smith 2892493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2902493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 291227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 292227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2932493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2942493cbb0SBarry Smith } 2952493cbb0SBarry Smith 2966d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 29778b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2985e42470aSBarry Smith } 29978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 30078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3015e42470aSBarry Smith 3027a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3038a729477SBarry Smith return 0; 3048a729477SBarry Smith } 3058a729477SBarry Smith 3062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3071eb62cbbSBarry Smith { 30844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 309dbd7a890SLois Curfman McInnes int ierr; 31078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 31178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3121eb62cbbSBarry Smith return 0; 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith 3151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3161eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3171eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3181eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3191eb62cbbSBarry Smith routine. 3201eb62cbbSBarry Smith */ 32144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3221eb62cbbSBarry Smith { 32344a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 32417699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3256abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 32617699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3275392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 328d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 329d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3301eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3311eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3321eb62cbbSBarry Smith IS istmp; 3331eb62cbbSBarry Smith 33477c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 33578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3380452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 339cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3400452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3411eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3421eb62cbbSBarry Smith idx = rows[i]; 3431eb62cbbSBarry Smith found = 0; 34417699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3451eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3461eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3471eb62cbbSBarry Smith } 3481eb62cbbSBarry Smith } 349bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3501eb62cbbSBarry Smith } 35117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3540452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 35517699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 35617699dbbSLois Curfman McInnes nrecvs = work[rank]; 35717699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35817699dbbSLois Curfman McInnes nmax = work[rank]; 3590452661fSBarry Smith PetscFree(work); 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith /* post receives: */ 3620452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 36378b31e54SBarry Smith CHKPTRQ(rvalues); 3640452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 36578b31e54SBarry Smith CHKPTRQ(recv_waits); 3661eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 367416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3681eb62cbbSBarry Smith } 3691eb62cbbSBarry Smith 3701eb62cbbSBarry Smith /* do sends: 3711eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3721eb62cbbSBarry Smith the ith processor 3731eb62cbbSBarry Smith */ 3740452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3750452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 37678b31e54SBarry Smith CHKPTRQ(send_waits); 3770452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3781eb62cbbSBarry Smith starts[0] = 0; 37917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3801eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3811eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3841eb62cbbSBarry Smith 3851eb62cbbSBarry Smith starts[0] = 0; 38617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3871eb62cbbSBarry Smith count = 0; 38817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3891eb62cbbSBarry Smith if (procs[i]) { 390416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3911eb62cbbSBarry Smith } 3921eb62cbbSBarry Smith } 3930452661fSBarry Smith PetscFree(starts); 3941eb62cbbSBarry Smith 39517699dbbSLois Curfman McInnes base = owners[rank]; 3961eb62cbbSBarry Smith 3971eb62cbbSBarry Smith /* wait on receives */ 3980452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3991eb62cbbSBarry Smith source = lens + nrecvs; 4001eb62cbbSBarry Smith count = nrecvs; slen = 0; 4011eb62cbbSBarry Smith while (count) { 402d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4031eb62cbbSBarry Smith /* unpack receives into our local space */ 4041eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 405d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 406d6dfbf8fSBarry Smith lens[imdex] = n; 4071eb62cbbSBarry Smith slen += n; 4081eb62cbbSBarry Smith count--; 4091eb62cbbSBarry Smith } 4100452661fSBarry Smith PetscFree(recv_waits); 4111eb62cbbSBarry Smith 4121eb62cbbSBarry Smith /* move the data into the send scatter */ 4130452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4141eb62cbbSBarry Smith count = 0; 4151eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4161eb62cbbSBarry Smith values = rvalues + i*nmax; 4171eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4181eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4191eb62cbbSBarry Smith } 4201eb62cbbSBarry Smith } 4210452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4220452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* actually zap the local rows */ 425537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 426464493b3SBarry Smith PLogObjectParent(A,istmp); 4270452661fSBarry Smith PetscFree(lrows); 42878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4311eb62cbbSBarry Smith 4321eb62cbbSBarry Smith /* wait on sends */ 4331eb62cbbSBarry Smith if (nsends) { 4340452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 43578b31e54SBarry Smith CHKPTRQ(send_status); 4361eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4370452661fSBarry Smith PetscFree(send_status); 4381eb62cbbSBarry Smith } 4390452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4401eb62cbbSBarry Smith 4411eb62cbbSBarry Smith return 0; 4421eb62cbbSBarry Smith } 4431eb62cbbSBarry Smith 444416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4451eb62cbbSBarry Smith { 446416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 447fbd6ef76SBarry Smith int ierr,nt; 448416022c9SBarry Smith 449a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 450fbd6ef76SBarry Smith if (nt != a->n) { 45147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 452fbd6ef76SBarry Smith } 45364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45438597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 45564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45638597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4571eb62cbbSBarry Smith return 0; 4581eb62cbbSBarry Smith } 4591eb62cbbSBarry Smith 460416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 461da3a660dSBarry Smith { 462416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463da3a660dSBarry Smith int ierr; 46464119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46538597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 46664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46738597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 468da3a660dSBarry Smith return 0; 469da3a660dSBarry Smith } 470da3a660dSBarry Smith 471416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 472da3a660dSBarry Smith { 473416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 474da3a660dSBarry Smith int ierr; 475da3a660dSBarry Smith 476da3a660dSBarry Smith /* do nondiagonal part */ 47738597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 478da3a660dSBarry Smith /* send it on its way */ 479537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 480da3a660dSBarry Smith /* do local part */ 48138597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 482da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 483da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 484da3a660dSBarry Smith /* but is not perhaps always true. */ 485537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 486da3a660dSBarry Smith return 0; 487da3a660dSBarry Smith } 488da3a660dSBarry Smith 489416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 490da3a660dSBarry Smith { 491416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 492da3a660dSBarry Smith int ierr; 493da3a660dSBarry Smith 494da3a660dSBarry Smith /* do nondiagonal part */ 49538597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 496da3a660dSBarry Smith /* send it on its way */ 497537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 498da3a660dSBarry Smith /* do local part */ 49938597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 500da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 501da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 502da3a660dSBarry Smith /* but is not perhaps always true. */ 503416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 50464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 505da3a660dSBarry Smith return 0; 506da3a660dSBarry Smith } 507da3a660dSBarry Smith 5081eb62cbbSBarry Smith /* 5091eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5101eb62cbbSBarry Smith diagonal block 5111eb62cbbSBarry Smith */ 512416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5131eb62cbbSBarry Smith { 514416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 51544cd7ae7SLois Curfman McInnes if (a->M != a->N) 51644cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 5175baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 5185baf8537SBarry Smith SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 519416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5201eb62cbbSBarry Smith } 5211eb62cbbSBarry Smith 522052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 523052efed2SBarry Smith { 524052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 525052efed2SBarry Smith int ierr; 526052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 527052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 528052efed2SBarry Smith return 0; 529052efed2SBarry Smith } 530052efed2SBarry Smith 53144a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5321eb62cbbSBarry Smith { 5331eb62cbbSBarry Smith Mat mat = (Mat) obj; 53444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5351eb62cbbSBarry Smith int ierr; 536a5a9c739SBarry Smith #if defined(PETSC_LOG) 5376e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 538a5a9c739SBarry Smith #endif 5390452661fSBarry Smith PetscFree(aij->rowners); 54078b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 54178b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5420452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5430452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5441eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 545a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5467a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5470452661fSBarry Smith PetscFree(aij); 548a5a9c739SBarry Smith PLogObjectDestroy(mat); 5490452661fSBarry Smith PetscHeaderDestroy(mat); 5501eb62cbbSBarry Smith return 0; 5511eb62cbbSBarry Smith } 5527857610eSBarry Smith #include "draw.h" 553b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 554ee50ffe9SBarry Smith 555416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5561eb62cbbSBarry Smith { 557416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558416022c9SBarry Smith int ierr; 559416022c9SBarry Smith 56017699dbbSLois Curfman McInnes if (aij->size == 1) { 561416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 562416022c9SBarry Smith } 563a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 564416022c9SBarry Smith return 0; 565416022c9SBarry Smith } 566416022c9SBarry Smith 567d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 568416022c9SBarry Smith { 56944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 570dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 571a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 572d636dbe3SBarry Smith FILE *fd; 57319bcc07fSBarry Smith ViewerType vtype; 574416022c9SBarry Smith 57519bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 57619bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 57790ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 57890ace30eSBarry Smith if (format == ASCII_FORMAT_INFO_DETAILED) { 579*4e220ebcSLois Curfman McInnes MatInfo info; 580*4e220ebcSLois Curfman McInnes int flg; 581a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 58290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 583*4e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 58495e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 58577c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 58695e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 587*4e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 58895e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 589*4e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 590*4e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 591*4e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 592*4e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 593*4e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 594a56f8943SBarry Smith fflush(fd); 59577c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 596a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 597416022c9SBarry Smith return 0; 598416022c9SBarry Smith } 59990ace30eSBarry Smith else if (format == ASCII_FORMAT_INFO) { 60008480c60SBarry Smith return 0; 60108480c60SBarry Smith } 602416022c9SBarry Smith } 603416022c9SBarry Smith 60419bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 60519bcc07fSBarry Smith Draw draw; 60619bcc07fSBarry Smith PetscTruth isnull; 60719bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 60819bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 60919bcc07fSBarry Smith } 61019bcc07fSBarry Smith 61119bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 61290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 61377c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 614d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 61517699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6161eb62cbbSBarry Smith aij->cend); 61778b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 61878b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 619d13ab20cSBarry Smith fflush(fd); 62077c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 621d13ab20cSBarry Smith } 622416022c9SBarry Smith else { 623a56f8943SBarry Smith int size = aij->size; 624a56f8943SBarry Smith rank = aij->rank; 62517699dbbSLois Curfman McInnes if (size == 1) { 62678b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 62795373324SBarry Smith } 62895373324SBarry Smith else { 62995373324SBarry Smith /* assemble the entire matrix onto first processor. */ 63095373324SBarry Smith Mat A; 631ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6322eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 63395373324SBarry Smith Scalar *a; 6342ee70a88SLois Curfman McInnes 63517699dbbSLois Curfman McInnes if (!rank) { 636b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 637c451ab25SLois Curfman McInnes CHKERRQ(ierr); 63895373324SBarry Smith } 63995373324SBarry Smith else { 640b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 641c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64295373324SBarry Smith } 643464493b3SBarry Smith PLogObjectParent(mat,A); 644416022c9SBarry Smith 64595373324SBarry Smith /* copy over the A part */ 646ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6472ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 64895373324SBarry Smith row = aij->rstart; 649dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 65095373324SBarry Smith for ( i=0; i<m; i++ ) { 651416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 65295373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 65395373324SBarry Smith } 6542ee70a88SLois Curfman McInnes aj = Aloc->j; 655dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 65695373324SBarry Smith 65795373324SBarry Smith /* copy over the B part */ 658ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6592ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66095373324SBarry Smith row = aij->rstart; 6610452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 662dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 66395373324SBarry Smith for ( i=0; i<m; i++ ) { 664416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 66595373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 66695373324SBarry Smith } 6670452661fSBarry Smith PetscFree(ct); 6686d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6696d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 67017699dbbSLois Curfman McInnes if (!rank) { 67178b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 67295373324SBarry Smith } 67378b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 67495373324SBarry Smith } 67595373324SBarry Smith } 6761eb62cbbSBarry Smith return 0; 6771eb62cbbSBarry Smith } 6781eb62cbbSBarry Smith 679416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 680416022c9SBarry Smith { 681416022c9SBarry Smith Mat mat = (Mat) obj; 682416022c9SBarry Smith int ierr; 68319bcc07fSBarry Smith ViewerType vtype; 684416022c9SBarry Smith 68519bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 68619bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 68719bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 688d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 689416022c9SBarry Smith } 69019bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 691416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 692416022c9SBarry Smith } 693416022c9SBarry Smith return 0; 694416022c9SBarry Smith } 695416022c9SBarry Smith 6961eb62cbbSBarry Smith /* 6971eb62cbbSBarry Smith This has to provide several versions. 6981eb62cbbSBarry Smith 6991eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7001eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 701d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7021eb62cbbSBarry Smith */ 703fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 704dbb450caSBarry Smith double fshift,int its,Vec xx) 7058a729477SBarry Smith { 70644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 707d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 708ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 709c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7106abc6512SBarry Smith int ierr,*idx, *diag; 711416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7128a729477SBarry Smith 713d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 714dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 715dbb450caSBarry Smith ls = ls + shift; 716ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 717d6dfbf8fSBarry Smith diag = A->diag; 718c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 719da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 72038597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 721da3a660dSBarry Smith } 72264119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72378b31e54SBarry Smith CHKERRQ(ierr); 72464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72578b31e54SBarry Smith CHKERRQ(ierr); 726d6dfbf8fSBarry Smith while (its--) { 727d6dfbf8fSBarry Smith /* go down through the rows */ 728d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 729d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 730dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 731dbb450caSBarry Smith v = A->a + A->i[i] + shift; 732d6dfbf8fSBarry Smith sum = b[i]; 733d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 734dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 735d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 736dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 737dbb450caSBarry Smith v = B->a + B->i[i] + shift; 738d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 739d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 740d6dfbf8fSBarry Smith } 741d6dfbf8fSBarry Smith /* come up through the rows */ 742d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 743d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 744dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 745dbb450caSBarry Smith v = A->a + A->i[i] + shift; 746d6dfbf8fSBarry Smith sum = b[i]; 747d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 748dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 749d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 750dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 751dbb450caSBarry Smith v = B->a + B->i[i] + shift; 752d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 753d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 754d6dfbf8fSBarry Smith } 755d6dfbf8fSBarry Smith } 756d6dfbf8fSBarry Smith } 757d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 758da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 75938597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 760da3a660dSBarry Smith } 76164119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76278b31e54SBarry Smith CHKERRQ(ierr); 76364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76478b31e54SBarry Smith CHKERRQ(ierr); 765d6dfbf8fSBarry Smith while (its--) { 766d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 767d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 768dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 769dbb450caSBarry Smith v = A->a + A->i[i] + shift; 770d6dfbf8fSBarry Smith sum = b[i]; 771d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 772dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 773d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 774dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 775dbb450caSBarry Smith v = B->a + B->i[i] + shift; 776d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 777d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 778d6dfbf8fSBarry Smith } 779d6dfbf8fSBarry Smith } 780d6dfbf8fSBarry Smith } 781d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 782da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 78338597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 784da3a660dSBarry Smith } 78564119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 78678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 78764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 78878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 789d6dfbf8fSBarry Smith while (its--) { 790d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 791d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 792dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 793dbb450caSBarry Smith v = A->a + A->i[i] + shift; 794d6dfbf8fSBarry Smith sum = b[i]; 795d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 796dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 797d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 798dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 799dbb450caSBarry Smith v = B->a + B->i[i] + shift; 800d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 801d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 802d6dfbf8fSBarry Smith } 803d6dfbf8fSBarry Smith } 804d6dfbf8fSBarry Smith } 805c16cb8f2SBarry Smith else { 806c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 807c16cb8f2SBarry Smith } 8088a729477SBarry Smith return 0; 8098a729477SBarry Smith } 810a66be287SLois Curfman McInnes 811*4e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 812a66be287SLois Curfman McInnes { 813a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 814a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 815*4e220ebcSLois Curfman McInnes int ierr; 816*4e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 817a66be287SLois Curfman McInnes 818*4e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 819*4e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 820*4e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 821*4e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 822*4e220ebcSLois Curfman McInnes info->block_size = 1.0; 823*4e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 824*4e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 825*4e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 826*4e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 827*4e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 828*4e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 829a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 830*4e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 831*4e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 832*4e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 833*4e220ebcSLois Curfman McInnes info->memory = isend[3]; 834*4e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 835a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 836*4e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 837*4e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 838*4e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 839*4e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 840*4e220ebcSLois Curfman McInnes info->memory = irecv[3]; 841*4e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 842a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 843*4e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 844*4e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 845*4e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 846*4e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 847*4e220ebcSLois Curfman McInnes info->memory = irecv[3]; 848*4e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 849a66be287SLois Curfman McInnes } 850*4e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 851*4e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 852*4e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 853*4e220ebcSLois Curfman McInnes 854a66be287SLois Curfman McInnes return 0; 855a66be287SLois Curfman McInnes } 856a66be287SLois Curfman McInnes 857299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 858299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 859299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 860299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 861299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 862299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 863299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 864299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 865299609e3SLois Curfman McInnes 866416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 867c74985f6SBarry Smith { 868c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 869c74985f6SBarry Smith 8706d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 8716d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 8726d4a8577SBarry Smith op == MAT_COLUMNS_SORTED || 8736d4a8577SBarry Smith op == MAT_ROW_ORIENTED) { 874c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 875c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 876c74985f6SBarry Smith } 8776d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 8786d4a8577SBarry Smith op == MAT_SYMMETRIC || 8796d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 8806d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 88194a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 8826d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 8834b0e389bSBarry Smith a->roworiented = 0; 8844b0e389bSBarry Smith MatSetOption(a->A,op); 8854b0e389bSBarry Smith MatSetOption(a->B,op); 8864b0e389bSBarry Smith } 8876d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 8886d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 889c0bbcb79SLois Curfman McInnes else 8904d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 891c74985f6SBarry Smith return 0; 892c74985f6SBarry Smith } 893c74985f6SBarry Smith 894d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 895c74985f6SBarry Smith { 89644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 897c74985f6SBarry Smith *m = mat->M; *n = mat->N; 898c74985f6SBarry Smith return 0; 899c74985f6SBarry Smith } 900c74985f6SBarry Smith 901d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 902c74985f6SBarry Smith { 90344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 904b7c46309SBarry Smith *m = mat->m; *n = mat->N; 905c74985f6SBarry Smith return 0; 906c74985f6SBarry Smith } 907c74985f6SBarry Smith 908d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 909c74985f6SBarry Smith { 91044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 911c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 912c74985f6SBarry Smith return 0; 913c74985f6SBarry Smith } 914c74985f6SBarry Smith 9156d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9166d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9176d84be18SBarry Smith 9186d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 91939e00950SLois Curfman McInnes { 920154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 92170f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 922154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 923154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 92470f0671dSBarry Smith int *cmap, *idx_p; 92539e00950SLois Curfman McInnes 9267a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 9277a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 9287a0afa10SBarry Smith 92970f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 9307a0afa10SBarry Smith /* 9317a0afa10SBarry Smith allocate enough space to hold information from the longest row. 9327a0afa10SBarry Smith */ 9337a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 934c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 935c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 9367a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 9377a0afa10SBarry Smith if (max < tmp) { max = tmp; } 9387a0afa10SBarry Smith } 9397a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 9407a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 9417a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 9427a0afa10SBarry Smith } 9437a0afa10SBarry Smith 944b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 945abc0e9e4SLois Curfman McInnes lrow = row - rstart; 94639e00950SLois Curfman McInnes 947154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 948154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 949154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 95038597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 95138597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 952154123eaSLois Curfman McInnes nztot = nzA + nzB; 953154123eaSLois Curfman McInnes 95470f0671dSBarry Smith cmap = mat->garray; 955154123eaSLois Curfman McInnes if (v || idx) { 956154123eaSLois Curfman McInnes if (nztot) { 957154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 95870f0671dSBarry Smith int imark = -1; 959154123eaSLois Curfman McInnes if (v) { 96070f0671dSBarry Smith *v = v_p = mat->rowvalues; 96139e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 96270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 963154123eaSLois Curfman McInnes else break; 964154123eaSLois Curfman McInnes } 965154123eaSLois Curfman McInnes imark = i; 96670f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 96770f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 968154123eaSLois Curfman McInnes } 969154123eaSLois Curfman McInnes if (idx) { 97070f0671dSBarry Smith *idx = idx_p = mat->rowindices; 97170f0671dSBarry Smith if (imark > -1) { 97270f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 97370f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 97470f0671dSBarry Smith } 97570f0671dSBarry Smith } else { 976154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 97770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 978154123eaSLois Curfman McInnes else break; 979154123eaSLois Curfman McInnes } 980154123eaSLois Curfman McInnes imark = i; 98170f0671dSBarry Smith } 98270f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 98370f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 98439e00950SLois Curfman McInnes } 98539e00950SLois Curfman McInnes } 9861ca473b0SSatish Balay else { 9871ca473b0SSatish Balay if (idx) *idx = 0; 9881ca473b0SSatish Balay if (v) *v = 0; 9891ca473b0SSatish Balay } 990154123eaSLois Curfman McInnes } 99139e00950SLois Curfman McInnes *nz = nztot; 99238597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 99338597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 99439e00950SLois Curfman McInnes return 0; 99539e00950SLois Curfman McInnes } 99639e00950SLois Curfman McInnes 9976d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 99839e00950SLois Curfman McInnes { 9997a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 10007a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 10017a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 10027a0afa10SBarry Smith } 10037a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 100439e00950SLois Curfman McInnes return 0; 100539e00950SLois Curfman McInnes } 100639e00950SLois Curfman McInnes 1007cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1008855ac2c5SLois Curfman McInnes { 1009855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1010ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1011416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1012416022c9SBarry Smith double sum = 0.0; 101304ca555eSLois Curfman McInnes Scalar *v; 101404ca555eSLois Curfman McInnes 101517699dbbSLois Curfman McInnes if (aij->size == 1) { 101614183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 101737fa93a5SLois Curfman McInnes } else { 101804ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 101904ca555eSLois Curfman McInnes v = amat->a; 102004ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 102104ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 102204ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 102304ca555eSLois Curfman McInnes #else 102404ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 102504ca555eSLois Curfman McInnes #endif 102604ca555eSLois Curfman McInnes } 102704ca555eSLois Curfman McInnes v = bmat->a; 102804ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 102904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 103004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 103104ca555eSLois Curfman McInnes #else 103204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 103304ca555eSLois Curfman McInnes #endif 103404ca555eSLois Curfman McInnes } 10356d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 103604ca555eSLois Curfman McInnes *norm = sqrt(*norm); 103704ca555eSLois Curfman McInnes } 103804ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 103904ca555eSLois Curfman McInnes double *tmp, *tmp2; 104004ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 10410452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 10420452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1043cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 104404ca555eSLois Curfman McInnes *norm = 0.0; 104504ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 104604ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1047579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 104804ca555eSLois Curfman McInnes } 104904ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 105004ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1051579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 105204ca555eSLois Curfman McInnes } 10536d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 105404ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 105504ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 105604ca555eSLois Curfman McInnes } 10570452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 105804ca555eSLois Curfman McInnes } 105904ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1060515d9167SLois Curfman McInnes double ntemp = 0.0; 106104ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1062dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 106304ca555eSLois Curfman McInnes sum = 0.0; 106404ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1065cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 106604ca555eSLois Curfman McInnes } 1067dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 106804ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1069cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 107004ca555eSLois Curfman McInnes } 1071515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 107204ca555eSLois Curfman McInnes } 10736d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 107404ca555eSLois Curfman McInnes } 107504ca555eSLois Curfman McInnes else { 1076515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 107704ca555eSLois Curfman McInnes } 107837fa93a5SLois Curfman McInnes } 1079855ac2c5SLois Curfman McInnes return 0; 1080855ac2c5SLois Curfman McInnes } 1081855ac2c5SLois Curfman McInnes 10820de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1083b7c46309SBarry Smith { 1084b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1085dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1086416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1087416022c9SBarry Smith Mat B; 1088b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1089b7c46309SBarry Smith Scalar *array; 1090b7c46309SBarry Smith 10913638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 10923638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1093b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1094b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1095b7c46309SBarry Smith 1096b7c46309SBarry Smith /* copy over the A part */ 1097ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1098b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1099b7c46309SBarry Smith row = a->rstart; 1100dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1101b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1102416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1103b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1104b7c46309SBarry Smith } 1105b7c46309SBarry Smith aj = Aloc->j; 11064af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1107b7c46309SBarry Smith 1108b7c46309SBarry Smith /* copy over the B part */ 1109ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1110b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1111b7c46309SBarry Smith row = a->rstart; 11120452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1113dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1114b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1115416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1116b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1117b7c46309SBarry Smith } 11180452661fSBarry Smith PetscFree(ct); 11196d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11206d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11213638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 11220de55854SLois Curfman McInnes *matout = B; 11230de55854SLois Curfman McInnes } else { 11240de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 11250452661fSBarry Smith PetscFree(a->rowners); 11260de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 11270de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 11280452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 11290452661fSBarry Smith if (a->garray) PetscFree(a->garray); 11300de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1131a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 11320452661fSBarry Smith PetscFree(a); 1133416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 11340452661fSBarry Smith PetscHeaderDestroy(B); 11350de55854SLois Curfman McInnes } 1136b7c46309SBarry Smith return 0; 1137b7c46309SBarry Smith } 1138b7c46309SBarry Smith 1139682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1140682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1141682d7d0cSBarry Smith { 1142682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1143682d7d0cSBarry Smith 1144682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1145682d7d0cSBarry Smith else return 0; 1146682d7d0cSBarry Smith } 1147682d7d0cSBarry Smith 11485a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 11495a838052SSatish Balay { 11505a838052SSatish Balay *bs = 1; 11515a838052SSatish Balay return 0; 11525a838052SSatish Balay } 11535a838052SSatish Balay 1154fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 11553d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 11562f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1157598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 11588a729477SBarry Smith /* -------------------------------------------------------------------*/ 11592ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 116039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 116144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 116244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1163299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1164299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1165299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 116644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1167b7c46309SBarry Smith MatTranspose_MPIAIJ, 1168a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1169855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1170ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11711eb62cbbSBarry Smith 0, 1172299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1173299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1174299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1175d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1176299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 11773e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1178b49de8d1SLois Curfman McInnes 0,0,0, 1179598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1180052efed2SBarry Smith MatPrintHelp_MPIAIJ, 11815a838052SSatish Balay MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ}; 11828a729477SBarry Smith 11831987afe7SBarry Smith /*@C 1184ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 11853a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 11863a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 11873a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 11883a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 11898a729477SBarry Smith 11908a729477SBarry Smith Input Parameters: 11911eb62cbbSBarry Smith . comm - MPI communicator 11927d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 119392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 119492e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 119592e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 119692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 119792e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 11987d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 119992e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1200ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1201ff756334SLois Curfman McInnes (same for all local rows) 12022bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 120392e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 12042bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 12052bd5e0b2SLois Curfman McInnes it is zero. 12062bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1207ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 12082bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 12092bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 12102bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 12118a729477SBarry Smith 1212ff756334SLois Curfman McInnes Output Parameter: 121344cd7ae7SLois Curfman McInnes . A - the matrix 12148a729477SBarry Smith 1215ff756334SLois Curfman McInnes Notes: 1216ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1217ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12180002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12190002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1220ff756334SLois Curfman McInnes 1221ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1222ff756334SLois Curfman McInnes (possibly both). 1223ff756334SLois Curfman McInnes 12245511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 12255511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 12265511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 12275511cfe3SLois Curfman McInnes 12285511cfe3SLois Curfman McInnes Options Database Keys: 12295511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 12306e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 12316e62573dSLois Curfman McInnes $ (max limit=5) 12326e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 12336e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 12346e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 12355511cfe3SLois Curfman McInnes 1236e0245417SLois Curfman McInnes Storage Information: 1237e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1238e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1239e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1240e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1241e0245417SLois Curfman McInnes 1242e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 12435ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 12445ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 12455ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 12465ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1247ff756334SLois Curfman McInnes 12485511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 12495511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 12502191d07cSBarry Smith 1251b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1252b810aeb4SBarry Smith $ ------------------- 12535511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 12545511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 12555511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 12565511cfe3SLois Curfman McInnes $ ------------------- 1257b810aeb4SBarry Smith $ 1258b810aeb4SBarry Smith 12595511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 12605511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 12615511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 12625511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 12635511cfe3SLois Curfman McInnes 12645511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 12655511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 12665511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 12673d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 126892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 12693d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 12703a511b96SLois Curfman McInnes 1271dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1272ff756334SLois Curfman McInnes 1273fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 12748a729477SBarry Smith @*/ 12751eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 127644cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 12778a729477SBarry Smith { 127844cd7ae7SLois Curfman McInnes Mat B; 127944cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 12806abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1281416022c9SBarry Smith 128244cd7ae7SLois Curfman McInnes *A = 0; 128344cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 128444cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 128544cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 128644cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 128744cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 128844cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 128944cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 129044cd7ae7SLois Curfman McInnes B->factor = 0; 129144cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 1292d6dfbf8fSBarry Smith 129344cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 129444cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 129544cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 12961eb62cbbSBarry Smith 1297b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 12982e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 12991987afe7SBarry Smith 1300dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13011eb62cbbSBarry Smith work[0] = m; work[1] = n; 1302d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1303dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1304dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13051eb62cbbSBarry Smith } 130644cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 130744cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 130844cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 130944cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 131044cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 131144cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 13121eb62cbbSBarry Smith 13131eb62cbbSBarry Smith /* build local table of row and column ownerships */ 131444cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 131544cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1316603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 131744cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 131844cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 131944cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 132044cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 13218a729477SBarry Smith } 132244cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 132344cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 132444cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 132544cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 132644cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 132744cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 13288a729477SBarry Smith } 132944cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 133044cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 13318a729477SBarry Smith 13325ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 133344cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 133444cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 13357b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 133644cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 133744cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 13388a729477SBarry Smith 13391eb62cbbSBarry Smith /* build cache for off array entries formed */ 134044cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 134144cd7ae7SLois Curfman McInnes b->colmap = 0; 134244cd7ae7SLois Curfman McInnes b->garray = 0; 134344cd7ae7SLois Curfman McInnes b->roworiented = 1; 13448a729477SBarry Smith 13451eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 134644cd7ae7SLois Curfman McInnes b->lvec = 0; 134744cd7ae7SLois Curfman McInnes b->Mvctx = 0; 13488a729477SBarry Smith 13497a0afa10SBarry Smith /* stuff for MatGetRow() */ 135044cd7ae7SLois Curfman McInnes b->rowindices = 0; 135144cd7ae7SLois Curfman McInnes b->rowvalues = 0; 135244cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 13537a0afa10SBarry Smith 135444cd7ae7SLois Curfman McInnes *A = B; 1355d6dfbf8fSBarry Smith return 0; 1356d6dfbf8fSBarry Smith } 1357c74985f6SBarry Smith 13583d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1359d6dfbf8fSBarry Smith { 1360d6dfbf8fSBarry Smith Mat mat; 1361416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1362a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1363d6dfbf8fSBarry Smith 1364416022c9SBarry Smith *newmat = 0; 13650452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1366a5a9c739SBarry Smith PLogObjectCreate(mat); 13670452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1368416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 136944a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 137044a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1371d6dfbf8fSBarry Smith mat->factor = matin->factor; 1372c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1373d6dfbf8fSBarry Smith 137444cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 137544cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 137644cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 137744cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1378d6dfbf8fSBarry Smith 1379416022c9SBarry Smith a->rstart = oldmat->rstart; 1380416022c9SBarry Smith a->rend = oldmat->rend; 1381416022c9SBarry Smith a->cstart = oldmat->cstart; 1382416022c9SBarry Smith a->cend = oldmat->cend; 138317699dbbSLois Curfman McInnes a->size = oldmat->size; 138417699dbbSLois Curfman McInnes a->rank = oldmat->rank; 138564119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1386bcd2baecSBarry Smith a->rowvalues = 0; 1387bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1388d6dfbf8fSBarry Smith 1389603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1390603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1391603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1392603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1393416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 13942ee70a88SLois Curfman McInnes if (oldmat->colmap) { 13950452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1396416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1397416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1398416022c9SBarry Smith } else a->colmap = 0; 13993f41c07dSBarry Smith if (oldmat->garray) { 14003f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 14013f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1402464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 14033f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1404416022c9SBarry Smith } else a->garray = 0; 1405d6dfbf8fSBarry Smith 1406416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1407416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1408a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1409416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1410416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1411416022c9SBarry Smith PLogObjectParent(mat,a->A); 1412416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1413416022c9SBarry Smith PLogObjectParent(mat,a->B); 14145dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 141525cdf11fSBarry Smith if (flg) { 1416682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1417682d7d0cSBarry Smith } 14188a729477SBarry Smith *newmat = mat; 14198a729477SBarry Smith return 0; 14208a729477SBarry Smith } 1421416022c9SBarry Smith 142277c4ece6SBarry Smith #include "sys.h" 1423416022c9SBarry Smith 142419bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1425416022c9SBarry Smith { 1426d65a2f8fSBarry Smith Mat A; 1427416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1428d65a2f8fSBarry Smith Scalar *vals,*svals; 142919bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1430416022c9SBarry Smith MPI_Status status; 143117699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1432d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 143319bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1434416022c9SBarry Smith 143517699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 143617699dbbSLois Curfman McInnes if (!rank) { 143790ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 143877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1439c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1440416022c9SBarry Smith } 1441416022c9SBarry Smith 1442416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1443416022c9SBarry Smith M = header[1]; N = header[2]; 1444416022c9SBarry Smith /* determine ownership of all rows */ 144517699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 14460452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1447416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1448416022c9SBarry Smith rowners[0] = 0; 144917699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1450416022c9SBarry Smith rowners[i] += rowners[i-1]; 1451416022c9SBarry Smith } 145217699dbbSLois Curfman McInnes rstart = rowners[rank]; 145317699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1454416022c9SBarry Smith 1455416022c9SBarry Smith /* distribute row lengths to all processors */ 14560452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1457416022c9SBarry Smith offlens = ourlens + (rend-rstart); 145817699dbbSLois Curfman McInnes if (!rank) { 14590452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 146077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 14610452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 146217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1463d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 14640452661fSBarry Smith PetscFree(sndcounts); 1465416022c9SBarry Smith } 1466416022c9SBarry Smith else { 1467416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1468416022c9SBarry Smith } 1469416022c9SBarry Smith 147017699dbbSLois Curfman McInnes if (!rank) { 1471416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 14720452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1473cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 147417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1475416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1476416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1477416022c9SBarry Smith } 1478416022c9SBarry Smith } 14790452661fSBarry Smith PetscFree(rowlengths); 1480416022c9SBarry Smith 1481416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1482416022c9SBarry Smith maxnz = 0; 148317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 14840452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1485416022c9SBarry Smith } 14860452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1487416022c9SBarry Smith 1488416022c9SBarry Smith /* read in my part of the matrix column indices */ 1489416022c9SBarry Smith nz = procsnz[0]; 14900452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 149177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1492d65a2f8fSBarry Smith 1493d65a2f8fSBarry Smith /* read in every one elses and ship off */ 149417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1495d65a2f8fSBarry Smith nz = procsnz[i]; 149677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1497d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1498d65a2f8fSBarry Smith } 14990452661fSBarry Smith PetscFree(cols); 1500416022c9SBarry Smith } 1501416022c9SBarry Smith else { 1502416022c9SBarry Smith /* determine buffer space needed for message */ 1503416022c9SBarry Smith nz = 0; 1504416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1505416022c9SBarry Smith nz += ourlens[i]; 1506416022c9SBarry Smith } 15070452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1508416022c9SBarry Smith 1509416022c9SBarry Smith /* receive message of column indices*/ 1510d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1511416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1512c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1513416022c9SBarry Smith } 1514416022c9SBarry Smith 1515416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1516cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1517416022c9SBarry Smith jj = 0; 1518416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1519416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1520d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1521416022c9SBarry Smith jj++; 1522416022c9SBarry Smith } 1523416022c9SBarry Smith } 1524d65a2f8fSBarry Smith 1525d65a2f8fSBarry Smith /* create our matrix */ 1526416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1527416022c9SBarry Smith ourlens[i] -= offlens[i]; 1528416022c9SBarry Smith } 1529d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1530d65a2f8fSBarry Smith A = *newmat; 15316d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1532d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1533d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1534d65a2f8fSBarry Smith } 1535416022c9SBarry Smith 153617699dbbSLois Curfman McInnes if (!rank) { 15370452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1538416022c9SBarry Smith 1539416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1540416022c9SBarry Smith nz = procsnz[0]; 154177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1542d65a2f8fSBarry Smith 1543d65a2f8fSBarry Smith /* insert into matrix */ 1544d65a2f8fSBarry Smith jj = rstart; 1545d65a2f8fSBarry Smith smycols = mycols; 1546d65a2f8fSBarry Smith svals = vals; 1547d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1548d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1549d65a2f8fSBarry Smith smycols += ourlens[i]; 1550d65a2f8fSBarry Smith svals += ourlens[i]; 1551d65a2f8fSBarry Smith jj++; 1552416022c9SBarry Smith } 1553416022c9SBarry Smith 1554d65a2f8fSBarry Smith /* read in other processors and ship out */ 155517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1556416022c9SBarry Smith nz = procsnz[i]; 155777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1558d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1559416022c9SBarry Smith } 15600452661fSBarry Smith PetscFree(procsnz); 1561416022c9SBarry Smith } 1562d65a2f8fSBarry Smith else { 1563d65a2f8fSBarry Smith /* receive numeric values */ 15640452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1565416022c9SBarry Smith 1566d65a2f8fSBarry Smith /* receive message of values*/ 1567d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1568d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1569c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1570d65a2f8fSBarry Smith 1571d65a2f8fSBarry Smith /* insert into matrix */ 1572d65a2f8fSBarry Smith jj = rstart; 1573d65a2f8fSBarry Smith smycols = mycols; 1574d65a2f8fSBarry Smith svals = vals; 1575d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1576d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1577d65a2f8fSBarry Smith smycols += ourlens[i]; 1578d65a2f8fSBarry Smith svals += ourlens[i]; 1579d65a2f8fSBarry Smith jj++; 1580d65a2f8fSBarry Smith } 1581d65a2f8fSBarry Smith } 15820452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1583d65a2f8fSBarry Smith 15846d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15856d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1586416022c9SBarry Smith return 0; 1587416022c9SBarry Smith } 1588