1905e6a2fSBarry Smith 2cb512458SBarry Smith #ifndef lint 3*537820f0SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.162 1996/08/14 15:08:16 bsmith Exp bsmith $"; 4cb512458SBarry Smith #endif 58a729477SBarry Smith 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 15905e6a2fSBarry Smith static int CreateColmap_MPIAIJ_Private(Mat mat) 169e25ed09SBarry Smith { 1744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 18ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 19905e6a2fSBarry Smith int n = B->n,i; 20dbb450caSBarry Smith 210452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 23cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 24905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 259e25ed09SBarry Smith return 0; 269e25ed09SBarry Smith } 279e25ed09SBarry Smith 282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 292493cbb0SBarry Smith 3070423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 3417699dbbSLois Curfman McInnes if (aij->size == 1) { 3551c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3648d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 37299609e3SLois Curfman McInnes return 0; 38299609e3SLois Curfman McInnes } 39299609e3SLois Curfman McInnes 404b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46905e6a2fSBarry Smith int roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67905e6a2fSBarry Smith if (!aij->colmap) { 68905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 69905e6a2fSBarry Smith } 70905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 71ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 722493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 734b0e389bSBarry Smith col = in[j]; 74d6dfbf8fSBarry Smith } 759e25ed09SBarry Smith } 764b0e389bSBarry Smith else col = in[j]; 774b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 784b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith } 811eb62cbbSBarry Smith } 821eb62cbbSBarry Smith else { 834b0e389bSBarry Smith if (roworiented) { 844b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 854b0e389bSBarry Smith } 864b0e389bSBarry Smith else { 874b0e389bSBarry Smith row = im[i]; 884b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 894b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 904b0e389bSBarry Smith } 914b0e389bSBarry Smith } 921eb62cbbSBarry Smith } 938a729477SBarry Smith } 948a729477SBarry Smith return 0; 958a729477SBarry Smith } 968a729477SBarry Smith 97b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 98b49de8d1SLois Curfman McInnes { 99b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 100b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 101b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116905e6a2fSBarry Smith if (!aij->colmap) { 117905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 118905e6a2fSBarry Smith } 119905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 120e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 121d9d09a02SSatish Balay else { 122b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes } 126d9d09a02SSatish Balay } 127b49de8d1SLois Curfman McInnes else { 128b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 129b49de8d1SLois Curfman McInnes } 130b49de8d1SLois Curfman McInnes } 131b49de8d1SLois Curfman McInnes return 0; 132b49de8d1SLois Curfman McInnes } 133b49de8d1SLois Curfman McInnes 134fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1358a729477SBarry Smith { 13644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 137d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13817699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13917699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1401eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1416abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1421eb62cbbSBarry Smith InsertMode addv; 1431eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1441eb62cbbSBarry Smith 1451eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 146d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 147dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 148bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1511eb62cbbSBarry Smith 1521eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1530452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 154cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1550452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1561eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1571eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1591eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1601eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1618a729477SBarry Smith } 1628a729477SBarry Smith } 1638a729477SBarry Smith } 16417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1651eb62cbbSBarry Smith 1661eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1670452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16817699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16917699dbbSLois Curfman McInnes nreceives = work[rank]; 17017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 17117699dbbSLois Curfman McInnes nmax = work[rank]; 1720452661fSBarry Smith PetscFree(work); 1731eb62cbbSBarry Smith 1741eb62cbbSBarry Smith /* post receives: 1751eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1761eb62cbbSBarry Smith (global index,value) we store the global index as a double 177d6dfbf8fSBarry Smith to simplify the message passing. 1781eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1791eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1801eb62cbbSBarry Smith this is a lot of wasted space. 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith 1831eb62cbbSBarry Smith This could be done better. 1841eb62cbbSBarry Smith */ 1850452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18678b31e54SBarry Smith CHKPTRQ(rvalues); 1870452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18878b31e54SBarry Smith CHKPTRQ(recv_waits); 1891eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 190416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1911eb62cbbSBarry Smith comm,recv_waits+i); 1921eb62cbbSBarry Smith } 1931eb62cbbSBarry Smith 1941eb62cbbSBarry Smith /* do sends: 1951eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1961eb62cbbSBarry Smith the ith processor 1971eb62cbbSBarry Smith */ 1980452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1990452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 20078b31e54SBarry Smith CHKPTRQ(send_waits); 2010452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2021eb62cbbSBarry Smith starts[0] = 0; 20317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2041eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2051eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2061eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2071eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2081eb62cbbSBarry Smith } 2090452661fSBarry Smith PetscFree(owner); 2101eb62cbbSBarry Smith starts[0] = 0; 21117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2121eb62cbbSBarry Smith count = 0; 21317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2141eb62cbbSBarry Smith if (procs[i]) { 215416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2161eb62cbbSBarry Smith comm,send_waits+count++); 2171eb62cbbSBarry Smith } 2181eb62cbbSBarry Smith } 2190452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2201eb62cbbSBarry Smith 2211eb62cbbSBarry Smith /* Free cache space */ 2222191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 22378b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2241eb62cbbSBarry Smith 2251eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2261eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2271eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2281eb62cbbSBarry Smith aij->rmax = nmax; 2291eb62cbbSBarry Smith 2301eb62cbbSBarry Smith return 0; 2311eb62cbbSBarry Smith } 23244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2331eb62cbbSBarry Smith 234fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2351eb62cbbSBarry Smith { 23644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2371eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 238416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 239905e6a2fSBarry Smith int row,col,other_disassembled; 2401eb62cbbSBarry Smith Scalar *values,val; 2411eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2421eb62cbbSBarry Smith 2431eb62cbbSBarry Smith /* wait on receives */ 2441eb62cbbSBarry Smith while (count) { 245d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2461eb62cbbSBarry Smith /* unpack receives into our local space */ 247d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 248c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2491eb62cbbSBarry Smith n = n/3; 2501eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 251227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 252227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2531eb62cbbSBarry Smith val = values[3*i+2]; 2541eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2551eb62cbbSBarry Smith col -= aij->cstart; 2561eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2571eb62cbbSBarry Smith } 2581eb62cbbSBarry Smith else { 259227d817aSBarry Smith if (mat->was_assembled) { 260905e6a2fSBarry Smith if (!aij->colmap) { 261905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262905e6a2fSBarry Smith } 263905e6a2fSBarry Smith col = aij->colmap[col] - 1; 264ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2652493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 266227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 267d6dfbf8fSBarry Smith } 2689e25ed09SBarry Smith } 2691eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith } 2721eb62cbbSBarry Smith count--; 2731eb62cbbSBarry Smith } 2740452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* wait on sends */ 2771eb62cbbSBarry Smith if (aij->nsends) { 2780452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27978b31e54SBarry Smith CHKPTRQ(send_status); 2801eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2810452661fSBarry Smith PetscFree(send_status); 2821eb62cbbSBarry Smith } 2830452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2841eb62cbbSBarry Smith 28564119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 28678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2881eb62cbbSBarry Smith 2892493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2902493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 291227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 292227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2932493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2942493cbb0SBarry Smith } 2952493cbb0SBarry Smith 2966d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 29778b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2985e42470aSBarry Smith } 29978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 30078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3015e42470aSBarry Smith 3027a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3038a729477SBarry Smith return 0; 3048a729477SBarry Smith } 3058a729477SBarry Smith 3062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3071eb62cbbSBarry Smith { 30844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 309dbd7a890SLois Curfman McInnes int ierr; 31078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 31178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3121eb62cbbSBarry Smith return 0; 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith 3151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3161eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3171eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3181eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3191eb62cbbSBarry Smith routine. 3201eb62cbbSBarry Smith */ 32144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3221eb62cbbSBarry Smith { 32344a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 32417699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3256abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 32617699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3275392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 328d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 329d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3301eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3311eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3321eb62cbbSBarry Smith IS istmp; 3331eb62cbbSBarry Smith 33477c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 33578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3380452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 339cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3400452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3411eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3421eb62cbbSBarry Smith idx = rows[i]; 3431eb62cbbSBarry Smith found = 0; 34417699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3451eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3461eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3471eb62cbbSBarry Smith } 3481eb62cbbSBarry Smith } 349bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3501eb62cbbSBarry Smith } 35117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3540452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 35517699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 35617699dbbSLois Curfman McInnes nrecvs = work[rank]; 35717699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35817699dbbSLois Curfman McInnes nmax = work[rank]; 3590452661fSBarry Smith PetscFree(work); 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith /* post receives: */ 3620452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 36378b31e54SBarry Smith CHKPTRQ(rvalues); 3640452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 36578b31e54SBarry Smith CHKPTRQ(recv_waits); 3661eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 367416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3681eb62cbbSBarry Smith } 3691eb62cbbSBarry Smith 3701eb62cbbSBarry Smith /* do sends: 3711eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3721eb62cbbSBarry Smith the ith processor 3731eb62cbbSBarry Smith */ 3740452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3750452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 37678b31e54SBarry Smith CHKPTRQ(send_waits); 3770452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3781eb62cbbSBarry Smith starts[0] = 0; 37917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3801eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3811eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3841eb62cbbSBarry Smith 3851eb62cbbSBarry Smith starts[0] = 0; 38617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3871eb62cbbSBarry Smith count = 0; 38817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3891eb62cbbSBarry Smith if (procs[i]) { 390416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3911eb62cbbSBarry Smith } 3921eb62cbbSBarry Smith } 3930452661fSBarry Smith PetscFree(starts); 3941eb62cbbSBarry Smith 39517699dbbSLois Curfman McInnes base = owners[rank]; 3961eb62cbbSBarry Smith 3971eb62cbbSBarry Smith /* wait on receives */ 3980452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3991eb62cbbSBarry Smith source = lens + nrecvs; 4001eb62cbbSBarry Smith count = nrecvs; slen = 0; 4011eb62cbbSBarry Smith while (count) { 402d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4031eb62cbbSBarry Smith /* unpack receives into our local space */ 4041eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 405d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 406d6dfbf8fSBarry Smith lens[imdex] = n; 4071eb62cbbSBarry Smith slen += n; 4081eb62cbbSBarry Smith count--; 4091eb62cbbSBarry Smith } 4100452661fSBarry Smith PetscFree(recv_waits); 4111eb62cbbSBarry Smith 4121eb62cbbSBarry Smith /* move the data into the send scatter */ 4130452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4141eb62cbbSBarry Smith count = 0; 4151eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4161eb62cbbSBarry Smith values = rvalues + i*nmax; 4171eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4181eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4191eb62cbbSBarry Smith } 4201eb62cbbSBarry Smith } 4210452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4220452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* actually zap the local rows */ 425*537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 426464493b3SBarry Smith PLogObjectParent(A,istmp); 4270452661fSBarry Smith PetscFree(lrows); 42878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4311eb62cbbSBarry Smith 4321eb62cbbSBarry Smith /* wait on sends */ 4331eb62cbbSBarry Smith if (nsends) { 4340452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 43578b31e54SBarry Smith CHKPTRQ(send_status); 4361eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4370452661fSBarry Smith PetscFree(send_status); 4381eb62cbbSBarry Smith } 4390452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4401eb62cbbSBarry Smith 4411eb62cbbSBarry Smith return 0; 4421eb62cbbSBarry Smith } 4431eb62cbbSBarry Smith 444416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4451eb62cbbSBarry Smith { 446416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 447fbd6ef76SBarry Smith int ierr,nt; 448416022c9SBarry Smith 449a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 450fbd6ef76SBarry Smith if (nt != a->n) { 45147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 452fbd6ef76SBarry Smith } 453a2ce50c7SBarry Smith ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 454fbd6ef76SBarry Smith if (nt != a->m) { 45547b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy"); 456fbd6ef76SBarry Smith } 45764119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45838597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 45964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 46038597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4611eb62cbbSBarry Smith return 0; 4621eb62cbbSBarry Smith } 4631eb62cbbSBarry Smith 464416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 465da3a660dSBarry Smith { 466416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467da3a660dSBarry Smith int ierr; 46864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46938597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 47064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 47138597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 472da3a660dSBarry Smith return 0; 473da3a660dSBarry Smith } 474da3a660dSBarry Smith 475416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 476da3a660dSBarry Smith { 477416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 478da3a660dSBarry Smith int ierr; 479da3a660dSBarry Smith 480da3a660dSBarry Smith /* do nondiagonal part */ 48138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 482da3a660dSBarry Smith /* send it on its way */ 483*537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 484da3a660dSBarry Smith /* do local part */ 48538597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 486da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 487da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 488da3a660dSBarry Smith /* but is not perhaps always true. */ 489*537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 490da3a660dSBarry Smith return 0; 491da3a660dSBarry Smith } 492da3a660dSBarry Smith 493416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 494da3a660dSBarry Smith { 495416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 496da3a660dSBarry Smith int ierr; 497da3a660dSBarry Smith 498da3a660dSBarry Smith /* do nondiagonal part */ 49938597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 500da3a660dSBarry Smith /* send it on its way */ 501*537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 502da3a660dSBarry Smith /* do local part */ 50338597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 504da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 505da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 506da3a660dSBarry Smith /* but is not perhaps always true. */ 507416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 50864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 509da3a660dSBarry Smith return 0; 510da3a660dSBarry Smith } 511da3a660dSBarry Smith 5121eb62cbbSBarry Smith /* 5131eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5141eb62cbbSBarry Smith diagonal block 5151eb62cbbSBarry Smith */ 516416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5171eb62cbbSBarry Smith { 518416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 51944cd7ae7SLois Curfman McInnes if (a->M != a->N) 52044cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 5215baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 5225baf8537SBarry Smith SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 523416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5241eb62cbbSBarry Smith } 5251eb62cbbSBarry Smith 526052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 527052efed2SBarry Smith { 528052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 529052efed2SBarry Smith int ierr; 530052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 531052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 532052efed2SBarry Smith return 0; 533052efed2SBarry Smith } 534052efed2SBarry Smith 53544a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5361eb62cbbSBarry Smith { 5371eb62cbbSBarry Smith Mat mat = (Mat) obj; 53844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5391eb62cbbSBarry Smith int ierr; 540a5a9c739SBarry Smith #if defined(PETSC_LOG) 5416e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 542a5a9c739SBarry Smith #endif 5430452661fSBarry Smith PetscFree(aij->rowners); 54478b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 54578b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5460452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5470452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5481eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 549a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5507a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5510452661fSBarry Smith PetscFree(aij); 552a5a9c739SBarry Smith PLogObjectDestroy(mat); 5530452661fSBarry Smith PetscHeaderDestroy(mat); 5541eb62cbbSBarry Smith return 0; 5551eb62cbbSBarry Smith } 5567857610eSBarry Smith #include "draw.h" 557b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 558ee50ffe9SBarry Smith 559416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5601eb62cbbSBarry Smith { 561416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 562416022c9SBarry Smith int ierr; 563416022c9SBarry Smith 56417699dbbSLois Curfman McInnes if (aij->size == 1) { 565416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 566416022c9SBarry Smith } 567a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 568416022c9SBarry Smith return 0; 569416022c9SBarry Smith } 570416022c9SBarry Smith 571d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 572416022c9SBarry Smith { 57344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 574dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 575a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 576d636dbe3SBarry Smith FILE *fd; 57719bcc07fSBarry Smith ViewerType vtype; 578416022c9SBarry Smith 57919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 58019bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 58190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 58290ace30eSBarry Smith if (format == ASCII_FORMAT_INFO_DETAILED) { 58395e01e2fSLois Curfman McInnes int nz, nzalloc, mem, flg; 584a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 58590ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 586a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 58795e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 58877c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 58995e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 59095e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59195e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 59295e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59308480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 59495e01e2fSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 59508480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 59695e01e2fSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 597a56f8943SBarry Smith fflush(fd); 59877c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 599a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 600416022c9SBarry Smith return 0; 601416022c9SBarry Smith } 60290ace30eSBarry Smith else if (format == ASCII_FORMAT_INFO) { 60308480c60SBarry Smith return 0; 60408480c60SBarry Smith } 605416022c9SBarry Smith } 606416022c9SBarry Smith 60719bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 60819bcc07fSBarry Smith Draw draw; 60919bcc07fSBarry Smith PetscTruth isnull; 61019bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 61119bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 61219bcc07fSBarry Smith } 61319bcc07fSBarry Smith 61419bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 61590ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 61677c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 617d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 61817699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6191eb62cbbSBarry Smith aij->cend); 62078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 62178b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 622d13ab20cSBarry Smith fflush(fd); 62377c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 624d13ab20cSBarry Smith } 625416022c9SBarry Smith else { 626a56f8943SBarry Smith int size = aij->size; 627a56f8943SBarry Smith rank = aij->rank; 62817699dbbSLois Curfman McInnes if (size == 1) { 62978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 63095373324SBarry Smith } 63195373324SBarry Smith else { 63295373324SBarry Smith /* assemble the entire matrix onto first processor. */ 63395373324SBarry Smith Mat A; 634ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6352eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 63695373324SBarry Smith Scalar *a; 6372ee70a88SLois Curfman McInnes 63817699dbbSLois Curfman McInnes if (!rank) { 639b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 640c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64195373324SBarry Smith } 64295373324SBarry Smith else { 643b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 644c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64595373324SBarry Smith } 646464493b3SBarry Smith PLogObjectParent(mat,A); 647416022c9SBarry Smith 64895373324SBarry Smith /* copy over the A part */ 649ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6502ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 65195373324SBarry Smith row = aij->rstart; 652dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 65395373324SBarry Smith for ( i=0; i<m; i++ ) { 654416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 65595373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 65695373324SBarry Smith } 6572ee70a88SLois Curfman McInnes aj = Aloc->j; 658dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 65995373324SBarry Smith 66095373324SBarry Smith /* copy over the B part */ 661ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6622ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66395373324SBarry Smith row = aij->rstart; 6640452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 665dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 66695373324SBarry Smith for ( i=0; i<m; i++ ) { 667416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 66895373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 66995373324SBarry Smith } 6700452661fSBarry Smith PetscFree(ct); 6716d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6726d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 67317699dbbSLois Curfman McInnes if (!rank) { 67478b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 67595373324SBarry Smith } 67678b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 67795373324SBarry Smith } 67895373324SBarry Smith } 6791eb62cbbSBarry Smith return 0; 6801eb62cbbSBarry Smith } 6811eb62cbbSBarry Smith 682416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 683416022c9SBarry Smith { 684416022c9SBarry Smith Mat mat = (Mat) obj; 685416022c9SBarry Smith int ierr; 68619bcc07fSBarry Smith ViewerType vtype; 687416022c9SBarry Smith 68819bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 68919bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 69019bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 691d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 692416022c9SBarry Smith } 69319bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 694416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 695416022c9SBarry Smith } 696416022c9SBarry Smith return 0; 697416022c9SBarry Smith } 698416022c9SBarry Smith 6991eb62cbbSBarry Smith /* 7001eb62cbbSBarry Smith This has to provide several versions. 7011eb62cbbSBarry Smith 7021eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7031eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 704d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7051eb62cbbSBarry Smith */ 706fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 707dbb450caSBarry Smith double fshift,int its,Vec xx) 7088a729477SBarry Smith { 70944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 710d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 711ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 712c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7136abc6512SBarry Smith int ierr,*idx, *diag; 714416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7158a729477SBarry Smith 716d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 717dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 718dbb450caSBarry Smith ls = ls + shift; 719ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 720d6dfbf8fSBarry Smith diag = A->diag; 721c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 722da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 72338597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 724da3a660dSBarry Smith } 72564119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72678b31e54SBarry Smith CHKERRQ(ierr); 72764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72878b31e54SBarry Smith CHKERRQ(ierr); 729d6dfbf8fSBarry Smith while (its--) { 730d6dfbf8fSBarry Smith /* go down through the rows */ 731d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 732d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 733dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 734dbb450caSBarry Smith v = A->a + A->i[i] + shift; 735d6dfbf8fSBarry Smith sum = b[i]; 736d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 737dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 738d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 739dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 740dbb450caSBarry Smith v = B->a + B->i[i] + shift; 741d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 742d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 743d6dfbf8fSBarry Smith } 744d6dfbf8fSBarry Smith /* come up through the rows */ 745d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 746d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 747dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 748dbb450caSBarry Smith v = A->a + A->i[i] + shift; 749d6dfbf8fSBarry Smith sum = b[i]; 750d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 751dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 752d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 753dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 754dbb450caSBarry Smith v = B->a + B->i[i] + shift; 755d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 756d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 757d6dfbf8fSBarry Smith } 758d6dfbf8fSBarry Smith } 759d6dfbf8fSBarry Smith } 760d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 761da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 76238597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 763da3a660dSBarry Smith } 76464119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76578b31e54SBarry Smith CHKERRQ(ierr); 76664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76778b31e54SBarry Smith CHKERRQ(ierr); 768d6dfbf8fSBarry Smith while (its--) { 769d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 770d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 771dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 772dbb450caSBarry Smith v = A->a + A->i[i] + shift; 773d6dfbf8fSBarry Smith sum = b[i]; 774d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 775dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 776d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 777dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 778dbb450caSBarry Smith v = B->a + B->i[i] + shift; 779d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 780d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 781d6dfbf8fSBarry Smith } 782d6dfbf8fSBarry Smith } 783d6dfbf8fSBarry Smith } 784d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 785da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 78638597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 787da3a660dSBarry Smith } 78864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 78978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 79064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 79178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 792d6dfbf8fSBarry Smith while (its--) { 793d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 794d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 795dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 796dbb450caSBarry Smith v = A->a + A->i[i] + shift; 797d6dfbf8fSBarry Smith sum = b[i]; 798d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 799dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 800d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 801dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 802dbb450caSBarry Smith v = B->a + B->i[i] + shift; 803d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 804d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 805d6dfbf8fSBarry Smith } 806d6dfbf8fSBarry Smith } 807d6dfbf8fSBarry Smith } 808c16cb8f2SBarry Smith else { 809c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 810c16cb8f2SBarry Smith } 8118a729477SBarry Smith return 0; 8128a729477SBarry Smith } 813a66be287SLois Curfman McInnes 814fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 815fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 816a66be287SLois Curfman McInnes { 817a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 818a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 819a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 820a66be287SLois Curfman McInnes 82178b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 82278b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 823a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 824a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 825bcd2baecSBarry Smith if (nz) *nz = isend[0]; 826bcd2baecSBarry Smith if (nzalloc) *nzalloc = isend[1]; 827bcd2baecSBarry Smith if (mem) *mem = isend[2]; 828a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 829d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 830bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 831bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 832bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 833a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 834d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 835bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 836bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 837bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 838a66be287SLois Curfman McInnes } 839a66be287SLois Curfman McInnes return 0; 840a66be287SLois Curfman McInnes } 841a66be287SLois Curfman McInnes 842299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 843299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 844299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 845299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 846299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 847299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 848299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 849299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 850299609e3SLois Curfman McInnes 851416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 852c74985f6SBarry Smith { 853c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 854c74985f6SBarry Smith 8556d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 8566d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 8576d4a8577SBarry Smith op == MAT_COLUMNS_SORTED || 8586d4a8577SBarry Smith op == MAT_ROW_ORIENTED) { 859c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 860c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 861c74985f6SBarry Smith } 8626d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 8636d4a8577SBarry Smith op == MAT_SYMMETRIC || 8646d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 8656d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 86694a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 8676d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 8684b0e389bSBarry Smith a->roworiented = 0; 8694b0e389bSBarry Smith MatSetOption(a->A,op); 8704b0e389bSBarry Smith MatSetOption(a->B,op); 8714b0e389bSBarry Smith } 8726d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 8736d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 874c0bbcb79SLois Curfman McInnes else 8754d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 876c74985f6SBarry Smith return 0; 877c74985f6SBarry Smith } 878c74985f6SBarry Smith 879d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 880c74985f6SBarry Smith { 88144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 882c74985f6SBarry Smith *m = mat->M; *n = mat->N; 883c74985f6SBarry Smith return 0; 884c74985f6SBarry Smith } 885c74985f6SBarry Smith 886d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 887c74985f6SBarry Smith { 88844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 889b7c46309SBarry Smith *m = mat->m; *n = mat->N; 890c74985f6SBarry Smith return 0; 891c74985f6SBarry Smith } 892c74985f6SBarry Smith 893d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 894c74985f6SBarry Smith { 89544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 896c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 897c74985f6SBarry Smith return 0; 898c74985f6SBarry Smith } 899c74985f6SBarry Smith 9006d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9016d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9026d84be18SBarry Smith 9036d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 90439e00950SLois Curfman McInnes { 905154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 90670f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 907154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 908154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 90970f0671dSBarry Smith int *cmap, *idx_p; 91039e00950SLois Curfman McInnes 9117a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 9127a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 9137a0afa10SBarry Smith 91470f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 9157a0afa10SBarry Smith /* 9167a0afa10SBarry Smith allocate enough space to hold information from the longest row. 9177a0afa10SBarry Smith */ 9187a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 919c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 920c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 9217a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 9227a0afa10SBarry Smith if (max < tmp) { max = tmp; } 9237a0afa10SBarry Smith } 9247a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 9257a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 9267a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 9277a0afa10SBarry Smith } 9287a0afa10SBarry Smith 929b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 930abc0e9e4SLois Curfman McInnes lrow = row - rstart; 93139e00950SLois Curfman McInnes 932154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 933154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 934154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 93538597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 93638597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 937154123eaSLois Curfman McInnes nztot = nzA + nzB; 938154123eaSLois Curfman McInnes 93970f0671dSBarry Smith cmap = mat->garray; 940154123eaSLois Curfman McInnes if (v || idx) { 941154123eaSLois Curfman McInnes if (nztot) { 942154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 94370f0671dSBarry Smith int imark = -1; 944154123eaSLois Curfman McInnes if (v) { 94570f0671dSBarry Smith *v = v_p = mat->rowvalues; 94639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 94770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 948154123eaSLois Curfman McInnes else break; 949154123eaSLois Curfman McInnes } 950154123eaSLois Curfman McInnes imark = i; 95170f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 95270f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 953154123eaSLois Curfman McInnes } 954154123eaSLois Curfman McInnes if (idx) { 95570f0671dSBarry Smith *idx = idx_p = mat->rowindices; 95670f0671dSBarry Smith if (imark > -1) { 95770f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 95870f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 95970f0671dSBarry Smith } 96070f0671dSBarry Smith } else { 961154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 96270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 963154123eaSLois Curfman McInnes else break; 964154123eaSLois Curfman McInnes } 965154123eaSLois Curfman McInnes imark = i; 96670f0671dSBarry Smith } 96770f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 96870f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 96939e00950SLois Curfman McInnes } 97039e00950SLois Curfman McInnes } 9711ca473b0SSatish Balay else { 9721ca473b0SSatish Balay if (idx) *idx = 0; 9731ca473b0SSatish Balay if (v) *v = 0; 9741ca473b0SSatish Balay } 975154123eaSLois Curfman McInnes } 97639e00950SLois Curfman McInnes *nz = nztot; 97738597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 97838597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 97939e00950SLois Curfman McInnes return 0; 98039e00950SLois Curfman McInnes } 98139e00950SLois Curfman McInnes 9826d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 98339e00950SLois Curfman McInnes { 9847a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 9857a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 9867a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 9877a0afa10SBarry Smith } 9887a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 98939e00950SLois Curfman McInnes return 0; 99039e00950SLois Curfman McInnes } 99139e00950SLois Curfman McInnes 992cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 993855ac2c5SLois Curfman McInnes { 994855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 995ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 996416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 997416022c9SBarry Smith double sum = 0.0; 99804ca555eSLois Curfman McInnes Scalar *v; 99904ca555eSLois Curfman McInnes 100017699dbbSLois Curfman McInnes if (aij->size == 1) { 100114183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 100237fa93a5SLois Curfman McInnes } else { 100304ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 100404ca555eSLois Curfman McInnes v = amat->a; 100504ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 100604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 100704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 100804ca555eSLois Curfman McInnes #else 100904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 101004ca555eSLois Curfman McInnes #endif 101104ca555eSLois Curfman McInnes } 101204ca555eSLois Curfman McInnes v = bmat->a; 101304ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 101404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 101504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 101604ca555eSLois Curfman McInnes #else 101704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 101804ca555eSLois Curfman McInnes #endif 101904ca555eSLois Curfman McInnes } 10206d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 102104ca555eSLois Curfman McInnes *norm = sqrt(*norm); 102204ca555eSLois Curfman McInnes } 102304ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 102404ca555eSLois Curfman McInnes double *tmp, *tmp2; 102504ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 10260452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 10270452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1028cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 102904ca555eSLois Curfman McInnes *norm = 0.0; 103004ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 103104ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1032579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 103304ca555eSLois Curfman McInnes } 103404ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 103504ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1036579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 103704ca555eSLois Curfman McInnes } 10386d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 103904ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 104004ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 104104ca555eSLois Curfman McInnes } 10420452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 104304ca555eSLois Curfman McInnes } 104404ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1045515d9167SLois Curfman McInnes double ntemp = 0.0; 104604ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1047dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 104804ca555eSLois Curfman McInnes sum = 0.0; 104904ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1050cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 105104ca555eSLois Curfman McInnes } 1052dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 105304ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1054cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 105504ca555eSLois Curfman McInnes } 1056515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 105704ca555eSLois Curfman McInnes } 10586d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 105904ca555eSLois Curfman McInnes } 106004ca555eSLois Curfman McInnes else { 1061515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 106204ca555eSLois Curfman McInnes } 106337fa93a5SLois Curfman McInnes } 1064855ac2c5SLois Curfman McInnes return 0; 1065855ac2c5SLois Curfman McInnes } 1066855ac2c5SLois Curfman McInnes 10670de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1068b7c46309SBarry Smith { 1069b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1070dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1071416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1072416022c9SBarry Smith Mat B; 1073b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1074b7c46309SBarry Smith Scalar *array; 1075b7c46309SBarry Smith 10763638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 10773638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1078b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1079b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1080b7c46309SBarry Smith 1081b7c46309SBarry Smith /* copy over the A part */ 1082ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1083b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1084b7c46309SBarry Smith row = a->rstart; 1085dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1086b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1087416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1088b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1089b7c46309SBarry Smith } 1090b7c46309SBarry Smith aj = Aloc->j; 10914af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1092b7c46309SBarry Smith 1093b7c46309SBarry Smith /* copy over the B part */ 1094ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1095b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1096b7c46309SBarry Smith row = a->rstart; 10970452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1098dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1099b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1100416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1101b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1102b7c46309SBarry Smith } 11030452661fSBarry Smith PetscFree(ct); 11046d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11056d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11063638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 11070de55854SLois Curfman McInnes *matout = B; 11080de55854SLois Curfman McInnes } else { 11090de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 11100452661fSBarry Smith PetscFree(a->rowners); 11110de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 11120de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 11130452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 11140452661fSBarry Smith if (a->garray) PetscFree(a->garray); 11150de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1116a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 11170452661fSBarry Smith PetscFree(a); 1118416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 11190452661fSBarry Smith PetscHeaderDestroy(B); 11200de55854SLois Curfman McInnes } 1121b7c46309SBarry Smith return 0; 1122b7c46309SBarry Smith } 1123b7c46309SBarry Smith 1124682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1125682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1126682d7d0cSBarry Smith { 1127682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1128682d7d0cSBarry Smith 1129682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1130682d7d0cSBarry Smith else return 0; 1131682d7d0cSBarry Smith } 1132682d7d0cSBarry Smith 11335a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 11345a838052SSatish Balay { 11355a838052SSatish Balay *bs = 1; 11365a838052SSatish Balay return 0; 11375a838052SSatish Balay } 11385a838052SSatish Balay 1139fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 11403d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 11412f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1142598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 11438a729477SBarry Smith /* -------------------------------------------------------------------*/ 11442ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 114539e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 114644a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 114744a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1148299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1149299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1150299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 115144a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1152b7c46309SBarry Smith MatTranspose_MPIAIJ, 1153a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1154855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1155ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11561eb62cbbSBarry Smith 0, 1157299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1158299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1159299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1160d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1161299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 11623e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1163b49de8d1SLois Curfman McInnes 0,0,0, 1164598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1165052efed2SBarry Smith MatPrintHelp_MPIAIJ, 11665a838052SSatish Balay MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ}; 11678a729477SBarry Smith 11681987afe7SBarry Smith /*@C 1169ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 11703a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 11713a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 11723a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 11733a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 11748a729477SBarry Smith 11758a729477SBarry Smith Input Parameters: 11761eb62cbbSBarry Smith . comm - MPI communicator 11777d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 117892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 117992e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 118092e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 118192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118292e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 11837d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 118492e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1185ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1186ff756334SLois Curfman McInnes (same for all local rows) 11872bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 118892e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 11892bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 11902bd5e0b2SLois Curfman McInnes it is zero. 11912bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1192ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 11932bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 11942bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 11952bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 11968a729477SBarry Smith 1197ff756334SLois Curfman McInnes Output Parameter: 119844cd7ae7SLois Curfman McInnes . A - the matrix 11998a729477SBarry Smith 1200ff756334SLois Curfman McInnes Notes: 1201ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1202ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12030002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12040002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1205ff756334SLois Curfman McInnes 1206ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1207ff756334SLois Curfman McInnes (possibly both). 1208ff756334SLois Curfman McInnes 12095511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 12105511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 12115511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 12125511cfe3SLois Curfman McInnes 12135511cfe3SLois Curfman McInnes Options Database Keys: 12145511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 12156e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 12166e62573dSLois Curfman McInnes $ (max limit=5) 12176e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 12186e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 12196e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 12205511cfe3SLois Curfman McInnes 1221e0245417SLois Curfman McInnes Storage Information: 1222e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1223e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1224e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1225e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1226e0245417SLois Curfman McInnes 1227e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 12285ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 12295ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 12305ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 12315ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1232ff756334SLois Curfman McInnes 12335511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 12345511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 12352191d07cSBarry Smith 1236b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1237b810aeb4SBarry Smith $ ------------------- 12385511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 12395511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 12405511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 12415511cfe3SLois Curfman McInnes $ ------------------- 1242b810aeb4SBarry Smith $ 1243b810aeb4SBarry Smith 12445511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 12455511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 12465511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 12475511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 12485511cfe3SLois Curfman McInnes 12495511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 12505511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 12515511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 12523d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 125392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 12543d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 12553a511b96SLois Curfman McInnes 1256dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1257ff756334SLois Curfman McInnes 1258fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 12598a729477SBarry Smith @*/ 12601eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 126144cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 12628a729477SBarry Smith { 126344cd7ae7SLois Curfman McInnes Mat B; 126444cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 12656abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1266416022c9SBarry Smith 126744cd7ae7SLois Curfman McInnes *A = 0; 126844cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 126944cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 127044cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 127144cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 127244cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 127344cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 127444cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 127544cd7ae7SLois Curfman McInnes B->factor = 0; 127644cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 1277d6dfbf8fSBarry Smith 127844cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 127944cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 128044cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 12811eb62cbbSBarry Smith 1282b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 12832e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 12841987afe7SBarry Smith 1285dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 12861eb62cbbSBarry Smith work[0] = m; work[1] = n; 1287d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1288dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1289dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 12901eb62cbbSBarry Smith } 129144cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 129244cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 129344cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 129444cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 129544cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 129644cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 12971eb62cbbSBarry Smith 12981eb62cbbSBarry Smith /* build local table of row and column ownerships */ 129944cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 130044cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1301603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 130244cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 130344cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 130444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 130544cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 13068a729477SBarry Smith } 130744cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 130844cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 130944cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 131044cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 131144cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 131244cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 13138a729477SBarry Smith } 131444cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 131544cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 13168a729477SBarry Smith 13175ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 131844cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 131944cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 13207b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 132144cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 132244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 13238a729477SBarry Smith 13241eb62cbbSBarry Smith /* build cache for off array entries formed */ 132544cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 132644cd7ae7SLois Curfman McInnes b->colmap = 0; 132744cd7ae7SLois Curfman McInnes b->garray = 0; 132844cd7ae7SLois Curfman McInnes b->roworiented = 1; 13298a729477SBarry Smith 13301eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 133144cd7ae7SLois Curfman McInnes b->lvec = 0; 133244cd7ae7SLois Curfman McInnes b->Mvctx = 0; 13338a729477SBarry Smith 13347a0afa10SBarry Smith /* stuff for MatGetRow() */ 133544cd7ae7SLois Curfman McInnes b->rowindices = 0; 133644cd7ae7SLois Curfman McInnes b->rowvalues = 0; 133744cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 13387a0afa10SBarry Smith 133944cd7ae7SLois Curfman McInnes *A = B; 1340d6dfbf8fSBarry Smith return 0; 1341d6dfbf8fSBarry Smith } 1342c74985f6SBarry Smith 13433d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1344d6dfbf8fSBarry Smith { 1345d6dfbf8fSBarry Smith Mat mat; 1346416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1347a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1348d6dfbf8fSBarry Smith 1349416022c9SBarry Smith *newmat = 0; 13500452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1351a5a9c739SBarry Smith PLogObjectCreate(mat); 13520452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1353416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 135444a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 135544a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1356d6dfbf8fSBarry Smith mat->factor = matin->factor; 1357c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1358d6dfbf8fSBarry Smith 135944cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 136044cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 136144cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 136244cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1363d6dfbf8fSBarry Smith 1364416022c9SBarry Smith a->rstart = oldmat->rstart; 1365416022c9SBarry Smith a->rend = oldmat->rend; 1366416022c9SBarry Smith a->cstart = oldmat->cstart; 1367416022c9SBarry Smith a->cend = oldmat->cend; 136817699dbbSLois Curfman McInnes a->size = oldmat->size; 136917699dbbSLois Curfman McInnes a->rank = oldmat->rank; 137064119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1371bcd2baecSBarry Smith a->rowvalues = 0; 1372bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1373d6dfbf8fSBarry Smith 1374603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1375603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1376603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1377603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1378416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 13792ee70a88SLois Curfman McInnes if (oldmat->colmap) { 13800452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1381416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1382416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1383416022c9SBarry Smith } else a->colmap = 0; 13843f41c07dSBarry Smith if (oldmat->garray) { 13853f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 13863f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1387464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 13883f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1389416022c9SBarry Smith } else a->garray = 0; 1390d6dfbf8fSBarry Smith 1391416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1392416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1393a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1394416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1395416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1396416022c9SBarry Smith PLogObjectParent(mat,a->A); 1397416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1398416022c9SBarry Smith PLogObjectParent(mat,a->B); 13995dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 140025cdf11fSBarry Smith if (flg) { 1401682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1402682d7d0cSBarry Smith } 14038a729477SBarry Smith *newmat = mat; 14048a729477SBarry Smith return 0; 14058a729477SBarry Smith } 1406416022c9SBarry Smith 140777c4ece6SBarry Smith #include "sys.h" 1408416022c9SBarry Smith 140919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1410416022c9SBarry Smith { 1411d65a2f8fSBarry Smith Mat A; 1412416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1413d65a2f8fSBarry Smith Scalar *vals,*svals; 141419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1415416022c9SBarry Smith MPI_Status status; 141617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1417d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 141819bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1419416022c9SBarry Smith 142017699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 142117699dbbSLois Curfman McInnes if (!rank) { 142290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 142377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1424c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1425416022c9SBarry Smith } 1426416022c9SBarry Smith 1427416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1428416022c9SBarry Smith M = header[1]; N = header[2]; 1429416022c9SBarry Smith /* determine ownership of all rows */ 143017699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 14310452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1432416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1433416022c9SBarry Smith rowners[0] = 0; 143417699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1435416022c9SBarry Smith rowners[i] += rowners[i-1]; 1436416022c9SBarry Smith } 143717699dbbSLois Curfman McInnes rstart = rowners[rank]; 143817699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1439416022c9SBarry Smith 1440416022c9SBarry Smith /* distribute row lengths to all processors */ 14410452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1442416022c9SBarry Smith offlens = ourlens + (rend-rstart); 144317699dbbSLois Curfman McInnes if (!rank) { 14440452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 144577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 14460452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 144717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1448d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 14490452661fSBarry Smith PetscFree(sndcounts); 1450416022c9SBarry Smith } 1451416022c9SBarry Smith else { 1452416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1453416022c9SBarry Smith } 1454416022c9SBarry Smith 145517699dbbSLois Curfman McInnes if (!rank) { 1456416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 14570452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1458cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 145917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1460416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1461416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1462416022c9SBarry Smith } 1463416022c9SBarry Smith } 14640452661fSBarry Smith PetscFree(rowlengths); 1465416022c9SBarry Smith 1466416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1467416022c9SBarry Smith maxnz = 0; 146817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 14690452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1470416022c9SBarry Smith } 14710452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1472416022c9SBarry Smith 1473416022c9SBarry Smith /* read in my part of the matrix column indices */ 1474416022c9SBarry Smith nz = procsnz[0]; 14750452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 147677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1477d65a2f8fSBarry Smith 1478d65a2f8fSBarry Smith /* read in every one elses and ship off */ 147917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1480d65a2f8fSBarry Smith nz = procsnz[i]; 148177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1482d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1483d65a2f8fSBarry Smith } 14840452661fSBarry Smith PetscFree(cols); 1485416022c9SBarry Smith } 1486416022c9SBarry Smith else { 1487416022c9SBarry Smith /* determine buffer space needed for message */ 1488416022c9SBarry Smith nz = 0; 1489416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1490416022c9SBarry Smith nz += ourlens[i]; 1491416022c9SBarry Smith } 14920452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1493416022c9SBarry Smith 1494416022c9SBarry Smith /* receive message of column indices*/ 1495d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1496416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1497c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1498416022c9SBarry Smith } 1499416022c9SBarry Smith 1500416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1501cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1502416022c9SBarry Smith jj = 0; 1503416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1504416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1505d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1506416022c9SBarry Smith jj++; 1507416022c9SBarry Smith } 1508416022c9SBarry Smith } 1509d65a2f8fSBarry Smith 1510d65a2f8fSBarry Smith /* create our matrix */ 1511416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1512416022c9SBarry Smith ourlens[i] -= offlens[i]; 1513416022c9SBarry Smith } 1514d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1515d65a2f8fSBarry Smith A = *newmat; 15166d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1517d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1518d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1519d65a2f8fSBarry Smith } 1520416022c9SBarry Smith 152117699dbbSLois Curfman McInnes if (!rank) { 15220452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1523416022c9SBarry Smith 1524416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1525416022c9SBarry Smith nz = procsnz[0]; 152677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1527d65a2f8fSBarry Smith 1528d65a2f8fSBarry Smith /* insert into matrix */ 1529d65a2f8fSBarry Smith jj = rstart; 1530d65a2f8fSBarry Smith smycols = mycols; 1531d65a2f8fSBarry Smith svals = vals; 1532d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1533d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1534d65a2f8fSBarry Smith smycols += ourlens[i]; 1535d65a2f8fSBarry Smith svals += ourlens[i]; 1536d65a2f8fSBarry Smith jj++; 1537416022c9SBarry Smith } 1538416022c9SBarry Smith 1539d65a2f8fSBarry Smith /* read in other processors and ship out */ 154017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1541416022c9SBarry Smith nz = procsnz[i]; 154277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1543d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1544416022c9SBarry Smith } 15450452661fSBarry Smith PetscFree(procsnz); 1546416022c9SBarry Smith } 1547d65a2f8fSBarry Smith else { 1548d65a2f8fSBarry Smith /* receive numeric values */ 15490452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1550416022c9SBarry Smith 1551d65a2f8fSBarry Smith /* receive message of values*/ 1552d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1553d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1554c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1555d65a2f8fSBarry Smith 1556d65a2f8fSBarry Smith /* insert into matrix */ 1557d65a2f8fSBarry Smith jj = rstart; 1558d65a2f8fSBarry Smith smycols = mycols; 1559d65a2f8fSBarry Smith svals = vals; 1560d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1561d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1562d65a2f8fSBarry Smith smycols += ourlens[i]; 1563d65a2f8fSBarry Smith svals += ourlens[i]; 1564d65a2f8fSBarry Smith jj++; 1565d65a2f8fSBarry Smith } 1566d65a2f8fSBarry Smith } 15670452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1568d65a2f8fSBarry Smith 15696d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15706d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1571416022c9SBarry Smith return 0; 1572416022c9SBarry Smith } 1573