1905e6a2fSBarry Smith 2cb512458SBarry Smith #ifndef lint 3*3f41c07dSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.161 1996/08/12 03:41:34 bsmith Exp bsmith $"; 4cb512458SBarry Smith #endif 58a729477SBarry Smith 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 15905e6a2fSBarry Smith static int CreateColmap_MPIAIJ_Private(Mat mat) 169e25ed09SBarry Smith { 1744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 18ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 19905e6a2fSBarry Smith int n = B->n,i; 20dbb450caSBarry Smith 210452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 23cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 24905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 259e25ed09SBarry Smith return 0; 269e25ed09SBarry Smith } 279e25ed09SBarry Smith 282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 292493cbb0SBarry Smith 3070423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 3417699dbbSLois Curfman McInnes if (aij->size == 1) { 3551c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3648d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 37299609e3SLois Curfman McInnes return 0; 38299609e3SLois Curfman McInnes } 39299609e3SLois Curfman McInnes 404b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46905e6a2fSBarry Smith int roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67905e6a2fSBarry Smith if (!aij->colmap) { 68905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 69905e6a2fSBarry Smith } 70905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 71ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 722493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 734b0e389bSBarry Smith col = in[j]; 74d6dfbf8fSBarry Smith } 759e25ed09SBarry Smith } 764b0e389bSBarry Smith else col = in[j]; 774b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 784b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith } 811eb62cbbSBarry Smith } 821eb62cbbSBarry Smith else { 834b0e389bSBarry Smith if (roworiented) { 844b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 854b0e389bSBarry Smith } 864b0e389bSBarry Smith else { 874b0e389bSBarry Smith row = im[i]; 884b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 894b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 904b0e389bSBarry Smith } 914b0e389bSBarry Smith } 921eb62cbbSBarry Smith } 938a729477SBarry Smith } 948a729477SBarry Smith return 0; 958a729477SBarry Smith } 968a729477SBarry Smith 97b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 98b49de8d1SLois Curfman McInnes { 99b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 100b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 101b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116905e6a2fSBarry Smith if (!aij->colmap) { 117905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 118905e6a2fSBarry Smith } 119905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 120e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 121d9d09a02SSatish Balay else { 122b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes } 126d9d09a02SSatish Balay } 127b49de8d1SLois Curfman McInnes else { 128b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 129b49de8d1SLois Curfman McInnes } 130b49de8d1SLois Curfman McInnes } 131b49de8d1SLois Curfman McInnes return 0; 132b49de8d1SLois Curfman McInnes } 133b49de8d1SLois Curfman McInnes 134fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1358a729477SBarry Smith { 13644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 137d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13817699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13917699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1401eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1416abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1421eb62cbbSBarry Smith InsertMode addv; 1431eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1441eb62cbbSBarry Smith 1451eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 146d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 147dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 148bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1511eb62cbbSBarry Smith 1521eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1530452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 154cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1550452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1561eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1571eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1591eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1601eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1618a729477SBarry Smith } 1628a729477SBarry Smith } 1638a729477SBarry Smith } 16417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1651eb62cbbSBarry Smith 1661eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1670452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16817699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16917699dbbSLois Curfman McInnes nreceives = work[rank]; 17017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 17117699dbbSLois Curfman McInnes nmax = work[rank]; 1720452661fSBarry Smith PetscFree(work); 1731eb62cbbSBarry Smith 1741eb62cbbSBarry Smith /* post receives: 1751eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1761eb62cbbSBarry Smith (global index,value) we store the global index as a double 177d6dfbf8fSBarry Smith to simplify the message passing. 1781eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1791eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1801eb62cbbSBarry Smith this is a lot of wasted space. 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith 1831eb62cbbSBarry Smith This could be done better. 1841eb62cbbSBarry Smith */ 1850452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18678b31e54SBarry Smith CHKPTRQ(rvalues); 1870452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18878b31e54SBarry Smith CHKPTRQ(recv_waits); 1891eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 190416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1911eb62cbbSBarry Smith comm,recv_waits+i); 1921eb62cbbSBarry Smith } 1931eb62cbbSBarry Smith 1941eb62cbbSBarry Smith /* do sends: 1951eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1961eb62cbbSBarry Smith the ith processor 1971eb62cbbSBarry Smith */ 1980452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1990452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 20078b31e54SBarry Smith CHKPTRQ(send_waits); 2010452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2021eb62cbbSBarry Smith starts[0] = 0; 20317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2041eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2051eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2061eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2071eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2081eb62cbbSBarry Smith } 2090452661fSBarry Smith PetscFree(owner); 2101eb62cbbSBarry Smith starts[0] = 0; 21117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2121eb62cbbSBarry Smith count = 0; 21317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2141eb62cbbSBarry Smith if (procs[i]) { 215416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2161eb62cbbSBarry Smith comm,send_waits+count++); 2171eb62cbbSBarry Smith } 2181eb62cbbSBarry Smith } 2190452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2201eb62cbbSBarry Smith 2211eb62cbbSBarry Smith /* Free cache space */ 2222191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 22378b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2241eb62cbbSBarry Smith 2251eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2261eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2271eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2281eb62cbbSBarry Smith aij->rmax = nmax; 2291eb62cbbSBarry Smith 2301eb62cbbSBarry Smith return 0; 2311eb62cbbSBarry Smith } 23244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2331eb62cbbSBarry Smith 234fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2351eb62cbbSBarry Smith { 23644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2371eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 238416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 239905e6a2fSBarry Smith int row,col,other_disassembled; 2401eb62cbbSBarry Smith Scalar *values,val; 2411eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2421eb62cbbSBarry Smith 2431eb62cbbSBarry Smith /* wait on receives */ 2441eb62cbbSBarry Smith while (count) { 245d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2461eb62cbbSBarry Smith /* unpack receives into our local space */ 247d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 248c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2491eb62cbbSBarry Smith n = n/3; 2501eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 251227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 252227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2531eb62cbbSBarry Smith val = values[3*i+2]; 2541eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2551eb62cbbSBarry Smith col -= aij->cstart; 2561eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2571eb62cbbSBarry Smith } 2581eb62cbbSBarry Smith else { 259227d817aSBarry Smith if (mat->was_assembled) { 260905e6a2fSBarry Smith if (!aij->colmap) { 261905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262905e6a2fSBarry Smith } 263905e6a2fSBarry Smith col = aij->colmap[col] - 1; 264ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2652493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 266227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 267d6dfbf8fSBarry Smith } 2689e25ed09SBarry Smith } 2691eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith } 2721eb62cbbSBarry Smith count--; 2731eb62cbbSBarry Smith } 2740452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* wait on sends */ 2771eb62cbbSBarry Smith if (aij->nsends) { 2780452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27978b31e54SBarry Smith CHKPTRQ(send_status); 2801eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2810452661fSBarry Smith PetscFree(send_status); 2821eb62cbbSBarry Smith } 2830452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2841eb62cbbSBarry Smith 28564119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 28678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2881eb62cbbSBarry Smith 2892493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2902493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 291227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 292227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2932493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2942493cbb0SBarry Smith } 2952493cbb0SBarry Smith 2966d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 29778b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2985e42470aSBarry Smith } 29978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 30078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3015e42470aSBarry Smith 3027a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3038a729477SBarry Smith return 0; 3048a729477SBarry Smith } 3058a729477SBarry Smith 3062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3071eb62cbbSBarry Smith { 30844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 309dbd7a890SLois Curfman McInnes int ierr; 31078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 31178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3121eb62cbbSBarry Smith return 0; 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith 3151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3161eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3171eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3181eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3191eb62cbbSBarry Smith routine. 3201eb62cbbSBarry Smith */ 32144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3221eb62cbbSBarry Smith { 32344a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 32417699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3256abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 32617699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3275392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 328d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 329d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3301eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3311eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3321eb62cbbSBarry Smith IS istmp; 3331eb62cbbSBarry Smith 33477c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 33578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3380452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 339cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3400452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3411eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3421eb62cbbSBarry Smith idx = rows[i]; 3431eb62cbbSBarry Smith found = 0; 34417699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3451eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3461eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3471eb62cbbSBarry Smith } 3481eb62cbbSBarry Smith } 349bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3501eb62cbbSBarry Smith } 35117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3540452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 35517699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 35617699dbbSLois Curfman McInnes nrecvs = work[rank]; 35717699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35817699dbbSLois Curfman McInnes nmax = work[rank]; 3590452661fSBarry Smith PetscFree(work); 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith /* post receives: */ 3620452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 36378b31e54SBarry Smith CHKPTRQ(rvalues); 3640452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 36578b31e54SBarry Smith CHKPTRQ(recv_waits); 3661eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 367416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3681eb62cbbSBarry Smith } 3691eb62cbbSBarry Smith 3701eb62cbbSBarry Smith /* do sends: 3711eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3721eb62cbbSBarry Smith the ith processor 3731eb62cbbSBarry Smith */ 3740452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3750452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 37678b31e54SBarry Smith CHKPTRQ(send_waits); 3770452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3781eb62cbbSBarry Smith starts[0] = 0; 37917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3801eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3811eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3841eb62cbbSBarry Smith 3851eb62cbbSBarry Smith starts[0] = 0; 38617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3871eb62cbbSBarry Smith count = 0; 38817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3891eb62cbbSBarry Smith if (procs[i]) { 390416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3911eb62cbbSBarry Smith } 3921eb62cbbSBarry Smith } 3930452661fSBarry Smith PetscFree(starts); 3941eb62cbbSBarry Smith 39517699dbbSLois Curfman McInnes base = owners[rank]; 3961eb62cbbSBarry Smith 3971eb62cbbSBarry Smith /* wait on receives */ 3980452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3991eb62cbbSBarry Smith source = lens + nrecvs; 4001eb62cbbSBarry Smith count = nrecvs; slen = 0; 4011eb62cbbSBarry Smith while (count) { 402d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4031eb62cbbSBarry Smith /* unpack receives into our local space */ 4041eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 405d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 406d6dfbf8fSBarry Smith lens[imdex] = n; 4071eb62cbbSBarry Smith slen += n; 4081eb62cbbSBarry Smith count--; 4091eb62cbbSBarry Smith } 4100452661fSBarry Smith PetscFree(recv_waits); 4111eb62cbbSBarry Smith 4121eb62cbbSBarry Smith /* move the data into the send scatter */ 4130452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4141eb62cbbSBarry Smith count = 0; 4151eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4161eb62cbbSBarry Smith values = rvalues + i*nmax; 4171eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4181eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4191eb62cbbSBarry Smith } 4201eb62cbbSBarry Smith } 4210452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4220452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* actually zap the local rows */ 425416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 426464493b3SBarry Smith PLogObjectParent(A,istmp); 4270452661fSBarry Smith PetscFree(lrows); 42878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4311eb62cbbSBarry Smith 4321eb62cbbSBarry Smith /* wait on sends */ 4331eb62cbbSBarry Smith if (nsends) { 4340452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 43578b31e54SBarry Smith CHKPTRQ(send_status); 4361eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4370452661fSBarry Smith PetscFree(send_status); 4381eb62cbbSBarry Smith } 4390452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4401eb62cbbSBarry Smith 4411eb62cbbSBarry Smith return 0; 4421eb62cbbSBarry Smith } 4431eb62cbbSBarry Smith 444416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4451eb62cbbSBarry Smith { 446416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 447fbd6ef76SBarry Smith int ierr,nt; 448416022c9SBarry Smith 449a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 450fbd6ef76SBarry Smith if (nt != a->n) { 45147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 452fbd6ef76SBarry Smith } 453a2ce50c7SBarry Smith ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 454fbd6ef76SBarry Smith if (nt != a->m) { 45547b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy"); 456fbd6ef76SBarry Smith } 45764119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45838597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 45964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 46038597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4611eb62cbbSBarry Smith return 0; 4621eb62cbbSBarry Smith } 4631eb62cbbSBarry Smith 464416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 465da3a660dSBarry Smith { 466416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467da3a660dSBarry Smith int ierr; 46864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46938597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 47064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 47138597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 472da3a660dSBarry Smith return 0; 473da3a660dSBarry Smith } 474da3a660dSBarry Smith 475416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 476da3a660dSBarry Smith { 477416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 478da3a660dSBarry Smith int ierr; 479da3a660dSBarry Smith 480da3a660dSBarry Smith /* do nondiagonal part */ 48138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 482da3a660dSBarry Smith /* send it on its way */ 483416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 48464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 485da3a660dSBarry Smith /* do local part */ 48638597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 487da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 488da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 489da3a660dSBarry Smith /* but is not perhaps always true. */ 490416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 49164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 492da3a660dSBarry Smith return 0; 493da3a660dSBarry Smith } 494da3a660dSBarry Smith 495416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 496da3a660dSBarry Smith { 497416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 498da3a660dSBarry Smith int ierr; 499da3a660dSBarry Smith 500da3a660dSBarry Smith /* do nondiagonal part */ 50138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 502da3a660dSBarry Smith /* send it on its way */ 503416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 50464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 505da3a660dSBarry Smith /* do local part */ 50638597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 507da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 508da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 509da3a660dSBarry Smith /* but is not perhaps always true. */ 510416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 51164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 512da3a660dSBarry Smith return 0; 513da3a660dSBarry Smith } 514da3a660dSBarry Smith 5151eb62cbbSBarry Smith /* 5161eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5171eb62cbbSBarry Smith diagonal block 5181eb62cbbSBarry Smith */ 519416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5201eb62cbbSBarry Smith { 521416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 52244cd7ae7SLois Curfman McInnes if (a->M != a->N) 52344cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 5245baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 5255baf8537SBarry Smith SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 526416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5271eb62cbbSBarry Smith } 5281eb62cbbSBarry Smith 529052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 530052efed2SBarry Smith { 531052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 532052efed2SBarry Smith int ierr; 533052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 534052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 535052efed2SBarry Smith return 0; 536052efed2SBarry Smith } 537052efed2SBarry Smith 53844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5391eb62cbbSBarry Smith { 5401eb62cbbSBarry Smith Mat mat = (Mat) obj; 54144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5421eb62cbbSBarry Smith int ierr; 543a5a9c739SBarry Smith #if defined(PETSC_LOG) 5446e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 545a5a9c739SBarry Smith #endif 5460452661fSBarry Smith PetscFree(aij->rowners); 54778b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 54878b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5490452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5500452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5511eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 552a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5537a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5540452661fSBarry Smith PetscFree(aij); 555a5a9c739SBarry Smith PLogObjectDestroy(mat); 5560452661fSBarry Smith PetscHeaderDestroy(mat); 5571eb62cbbSBarry Smith return 0; 5581eb62cbbSBarry Smith } 5597857610eSBarry Smith #include "draw.h" 560b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 561ee50ffe9SBarry Smith 562416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5631eb62cbbSBarry Smith { 564416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 565416022c9SBarry Smith int ierr; 566416022c9SBarry Smith 56717699dbbSLois Curfman McInnes if (aij->size == 1) { 568416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 569416022c9SBarry Smith } 570a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 571416022c9SBarry Smith return 0; 572416022c9SBarry Smith } 573416022c9SBarry Smith 574d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 575416022c9SBarry Smith { 57644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 577dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 578a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 579d636dbe3SBarry Smith FILE *fd; 58019bcc07fSBarry Smith ViewerType vtype; 581416022c9SBarry Smith 58219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 58319bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 58490ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 58590ace30eSBarry Smith if (format == ASCII_FORMAT_INFO_DETAILED) { 58695e01e2fSLois Curfman McInnes int nz, nzalloc, mem, flg; 587a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 58890ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 589a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 59095e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 59177c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 59295e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 59395e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59495e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 59595e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59608480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 59795e01e2fSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 59808480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 59995e01e2fSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 600a56f8943SBarry Smith fflush(fd); 60177c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 602a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 603416022c9SBarry Smith return 0; 604416022c9SBarry Smith } 60590ace30eSBarry Smith else if (format == ASCII_FORMAT_INFO) { 60608480c60SBarry Smith return 0; 60708480c60SBarry Smith } 608416022c9SBarry Smith } 609416022c9SBarry Smith 61019bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 61119bcc07fSBarry Smith Draw draw; 61219bcc07fSBarry Smith PetscTruth isnull; 61319bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 61419bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 61519bcc07fSBarry Smith } 61619bcc07fSBarry Smith 61719bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 61890ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 61977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 620d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 62117699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6221eb62cbbSBarry Smith aij->cend); 62378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 62478b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 625d13ab20cSBarry Smith fflush(fd); 62677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 627d13ab20cSBarry Smith } 628416022c9SBarry Smith else { 629a56f8943SBarry Smith int size = aij->size; 630a56f8943SBarry Smith rank = aij->rank; 63117699dbbSLois Curfman McInnes if (size == 1) { 63278b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 63395373324SBarry Smith } 63495373324SBarry Smith else { 63595373324SBarry Smith /* assemble the entire matrix onto first processor. */ 63695373324SBarry Smith Mat A; 637ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6382eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 63995373324SBarry Smith Scalar *a; 6402ee70a88SLois Curfman McInnes 64117699dbbSLois Curfman McInnes if (!rank) { 642b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 643c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64495373324SBarry Smith } 64595373324SBarry Smith else { 646b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 647c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64895373324SBarry Smith } 649464493b3SBarry Smith PLogObjectParent(mat,A); 650416022c9SBarry Smith 65195373324SBarry Smith /* copy over the A part */ 652ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6532ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 65495373324SBarry Smith row = aij->rstart; 655dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 65695373324SBarry Smith for ( i=0; i<m; i++ ) { 657416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 65895373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 65995373324SBarry Smith } 6602ee70a88SLois Curfman McInnes aj = Aloc->j; 661dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 66295373324SBarry Smith 66395373324SBarry Smith /* copy over the B part */ 664ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6652ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66695373324SBarry Smith row = aij->rstart; 6670452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 668dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 66995373324SBarry Smith for ( i=0; i<m; i++ ) { 670416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 67195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 67295373324SBarry Smith } 6730452661fSBarry Smith PetscFree(ct); 6746d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6756d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 67617699dbbSLois Curfman McInnes if (!rank) { 67778b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 67895373324SBarry Smith } 67978b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 68095373324SBarry Smith } 68195373324SBarry Smith } 6821eb62cbbSBarry Smith return 0; 6831eb62cbbSBarry Smith } 6841eb62cbbSBarry Smith 685416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 686416022c9SBarry Smith { 687416022c9SBarry Smith Mat mat = (Mat) obj; 688416022c9SBarry Smith int ierr; 68919bcc07fSBarry Smith ViewerType vtype; 690416022c9SBarry Smith 69119bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 69219bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 69319bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 694d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 695416022c9SBarry Smith } 69619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 697416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 698416022c9SBarry Smith } 699416022c9SBarry Smith return 0; 700416022c9SBarry Smith } 701416022c9SBarry Smith 7021eb62cbbSBarry Smith /* 7031eb62cbbSBarry Smith This has to provide several versions. 7041eb62cbbSBarry Smith 7051eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7061eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 707d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7081eb62cbbSBarry Smith */ 709fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 710dbb450caSBarry Smith double fshift,int its,Vec xx) 7118a729477SBarry Smith { 71244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 713d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 714ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 715c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7166abc6512SBarry Smith int ierr,*idx, *diag; 717416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7188a729477SBarry Smith 719d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 720dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 721dbb450caSBarry Smith ls = ls + shift; 722ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 723d6dfbf8fSBarry Smith diag = A->diag; 724c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 725da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 72638597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 727da3a660dSBarry Smith } 72864119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72978b31e54SBarry Smith CHKERRQ(ierr); 73064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 73178b31e54SBarry Smith CHKERRQ(ierr); 732d6dfbf8fSBarry Smith while (its--) { 733d6dfbf8fSBarry Smith /* go down through the rows */ 734d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 735d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 736dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 737dbb450caSBarry Smith v = A->a + A->i[i] + shift; 738d6dfbf8fSBarry Smith sum = b[i]; 739d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 740dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 741d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 742dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 743dbb450caSBarry Smith v = B->a + B->i[i] + shift; 744d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 745d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 746d6dfbf8fSBarry Smith } 747d6dfbf8fSBarry Smith /* come up through the rows */ 748d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 749d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 750dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 751dbb450caSBarry Smith v = A->a + A->i[i] + shift; 752d6dfbf8fSBarry Smith sum = b[i]; 753d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 754dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 755d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 756dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 757dbb450caSBarry Smith v = B->a + B->i[i] + shift; 758d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 759d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 760d6dfbf8fSBarry Smith } 761d6dfbf8fSBarry Smith } 762d6dfbf8fSBarry Smith } 763d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 764da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 76538597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 766da3a660dSBarry Smith } 76764119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76878b31e54SBarry Smith CHKERRQ(ierr); 76964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 77078b31e54SBarry Smith CHKERRQ(ierr); 771d6dfbf8fSBarry Smith while (its--) { 772d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 773d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 774dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 775dbb450caSBarry Smith v = A->a + A->i[i] + shift; 776d6dfbf8fSBarry Smith sum = b[i]; 777d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 778dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 779d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 780dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 781dbb450caSBarry Smith v = B->a + B->i[i] + shift; 782d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 783d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 784d6dfbf8fSBarry Smith } 785d6dfbf8fSBarry Smith } 786d6dfbf8fSBarry Smith } 787d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 788da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 78938597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 790da3a660dSBarry Smith } 79164119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 79278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 79364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 79478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 795d6dfbf8fSBarry Smith while (its--) { 796d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 797d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 798dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 799dbb450caSBarry Smith v = A->a + A->i[i] + shift; 800d6dfbf8fSBarry Smith sum = b[i]; 801d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 802dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 803d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 804dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 805dbb450caSBarry Smith v = B->a + B->i[i] + shift; 806d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 807d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 808d6dfbf8fSBarry Smith } 809d6dfbf8fSBarry Smith } 810d6dfbf8fSBarry Smith } 811c16cb8f2SBarry Smith else { 812c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 813c16cb8f2SBarry Smith } 8148a729477SBarry Smith return 0; 8158a729477SBarry Smith } 816a66be287SLois Curfman McInnes 817fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 818fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 819a66be287SLois Curfman McInnes { 820a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 821a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 822a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 823a66be287SLois Curfman McInnes 82478b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 82578b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 826a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 827a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 828bcd2baecSBarry Smith if (nz) *nz = isend[0]; 829bcd2baecSBarry Smith if (nzalloc) *nzalloc = isend[1]; 830bcd2baecSBarry Smith if (mem) *mem = isend[2]; 831a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 832d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 833bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 834bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 835bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 836a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 837d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 838bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 839bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 840bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 841a66be287SLois Curfman McInnes } 842a66be287SLois Curfman McInnes return 0; 843a66be287SLois Curfman McInnes } 844a66be287SLois Curfman McInnes 845299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 846299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 847299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 848299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 849299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 850299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 851299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 852299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 853299609e3SLois Curfman McInnes 854416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 855c74985f6SBarry Smith { 856c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 857c74985f6SBarry Smith 8586d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 8596d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 8606d4a8577SBarry Smith op == MAT_COLUMNS_SORTED || 8616d4a8577SBarry Smith op == MAT_ROW_ORIENTED) { 862c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 863c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 864c74985f6SBarry Smith } 8656d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 8666d4a8577SBarry Smith op == MAT_SYMMETRIC || 8676d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 8686d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 86994a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 8706d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 8714b0e389bSBarry Smith a->roworiented = 0; 8724b0e389bSBarry Smith MatSetOption(a->A,op); 8734b0e389bSBarry Smith MatSetOption(a->B,op); 8744b0e389bSBarry Smith } 8756d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 8766d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 877c0bbcb79SLois Curfman McInnes else 8784d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 879c74985f6SBarry Smith return 0; 880c74985f6SBarry Smith } 881c74985f6SBarry Smith 882d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 883c74985f6SBarry Smith { 88444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 885c74985f6SBarry Smith *m = mat->M; *n = mat->N; 886c74985f6SBarry Smith return 0; 887c74985f6SBarry Smith } 888c74985f6SBarry Smith 889d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 890c74985f6SBarry Smith { 89144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 892b7c46309SBarry Smith *m = mat->m; *n = mat->N; 893c74985f6SBarry Smith return 0; 894c74985f6SBarry Smith } 895c74985f6SBarry Smith 896d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 897c74985f6SBarry Smith { 89844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 899c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 900c74985f6SBarry Smith return 0; 901c74985f6SBarry Smith } 902c74985f6SBarry Smith 9036d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9046d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9056d84be18SBarry Smith 9066d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 90739e00950SLois Curfman McInnes { 908154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 90970f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 910154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 911154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 91270f0671dSBarry Smith int *cmap, *idx_p; 91339e00950SLois Curfman McInnes 9147a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 9157a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 9167a0afa10SBarry Smith 91770f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 9187a0afa10SBarry Smith /* 9197a0afa10SBarry Smith allocate enough space to hold information from the longest row. 9207a0afa10SBarry Smith */ 9217a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 922c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 923c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 9247a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 9257a0afa10SBarry Smith if (max < tmp) { max = tmp; } 9267a0afa10SBarry Smith } 9277a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 9287a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 9297a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 9307a0afa10SBarry Smith } 9317a0afa10SBarry Smith 932b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 933abc0e9e4SLois Curfman McInnes lrow = row - rstart; 93439e00950SLois Curfman McInnes 935154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 936154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 937154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 93838597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 93938597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 940154123eaSLois Curfman McInnes nztot = nzA + nzB; 941154123eaSLois Curfman McInnes 94270f0671dSBarry Smith cmap = mat->garray; 943154123eaSLois Curfman McInnes if (v || idx) { 944154123eaSLois Curfman McInnes if (nztot) { 945154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 94670f0671dSBarry Smith int imark = -1; 947154123eaSLois Curfman McInnes if (v) { 94870f0671dSBarry Smith *v = v_p = mat->rowvalues; 94939e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 95070f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 951154123eaSLois Curfman McInnes else break; 952154123eaSLois Curfman McInnes } 953154123eaSLois Curfman McInnes imark = i; 95470f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 95570f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 956154123eaSLois Curfman McInnes } 957154123eaSLois Curfman McInnes if (idx) { 95870f0671dSBarry Smith *idx = idx_p = mat->rowindices; 95970f0671dSBarry Smith if (imark > -1) { 96070f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 96170f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 96270f0671dSBarry Smith } 96370f0671dSBarry Smith } else { 964154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 96570f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 966154123eaSLois Curfman McInnes else break; 967154123eaSLois Curfman McInnes } 968154123eaSLois Curfman McInnes imark = i; 96970f0671dSBarry Smith } 97070f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 97170f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 97239e00950SLois Curfman McInnes } 97339e00950SLois Curfman McInnes } 9741ca473b0SSatish Balay else { 9751ca473b0SSatish Balay if (idx) *idx = 0; 9761ca473b0SSatish Balay if (v) *v = 0; 9771ca473b0SSatish Balay } 978154123eaSLois Curfman McInnes } 97939e00950SLois Curfman McInnes *nz = nztot; 98038597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 98138597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 98239e00950SLois Curfman McInnes return 0; 98339e00950SLois Curfman McInnes } 98439e00950SLois Curfman McInnes 9856d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 98639e00950SLois Curfman McInnes { 9877a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 9887a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 9897a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 9907a0afa10SBarry Smith } 9917a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 99239e00950SLois Curfman McInnes return 0; 99339e00950SLois Curfman McInnes } 99439e00950SLois Curfman McInnes 995cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 996855ac2c5SLois Curfman McInnes { 997855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 998ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 999416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1000416022c9SBarry Smith double sum = 0.0; 100104ca555eSLois Curfman McInnes Scalar *v; 100204ca555eSLois Curfman McInnes 100317699dbbSLois Curfman McInnes if (aij->size == 1) { 100414183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 100537fa93a5SLois Curfman McInnes } else { 100604ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 100704ca555eSLois Curfman McInnes v = amat->a; 100804ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 100904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 101004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 101104ca555eSLois Curfman McInnes #else 101204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 101304ca555eSLois Curfman McInnes #endif 101404ca555eSLois Curfman McInnes } 101504ca555eSLois Curfman McInnes v = bmat->a; 101604ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 101704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 101804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 101904ca555eSLois Curfman McInnes #else 102004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 102104ca555eSLois Curfman McInnes #endif 102204ca555eSLois Curfman McInnes } 10236d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 102404ca555eSLois Curfman McInnes *norm = sqrt(*norm); 102504ca555eSLois Curfman McInnes } 102604ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 102704ca555eSLois Curfman McInnes double *tmp, *tmp2; 102804ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 10290452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 10300452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1031cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 103204ca555eSLois Curfman McInnes *norm = 0.0; 103304ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 103404ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1035579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 103604ca555eSLois Curfman McInnes } 103704ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 103804ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1039579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 104004ca555eSLois Curfman McInnes } 10416d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 104204ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 104304ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 104404ca555eSLois Curfman McInnes } 10450452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 104604ca555eSLois Curfman McInnes } 104704ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1048515d9167SLois Curfman McInnes double ntemp = 0.0; 104904ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1050dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 105104ca555eSLois Curfman McInnes sum = 0.0; 105204ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1053cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 105404ca555eSLois Curfman McInnes } 1055dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 105604ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1057cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 105804ca555eSLois Curfman McInnes } 1059515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 106004ca555eSLois Curfman McInnes } 10616d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 106204ca555eSLois Curfman McInnes } 106304ca555eSLois Curfman McInnes else { 1064515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 106504ca555eSLois Curfman McInnes } 106637fa93a5SLois Curfman McInnes } 1067855ac2c5SLois Curfman McInnes return 0; 1068855ac2c5SLois Curfman McInnes } 1069855ac2c5SLois Curfman McInnes 10700de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1071b7c46309SBarry Smith { 1072b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1073dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1074416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1075416022c9SBarry Smith Mat B; 1076b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1077b7c46309SBarry Smith Scalar *array; 1078b7c46309SBarry Smith 10793638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 10803638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1081b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1082b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1083b7c46309SBarry Smith 1084b7c46309SBarry Smith /* copy over the A part */ 1085ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1086b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1087b7c46309SBarry Smith row = a->rstart; 1088dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1089b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1090416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1091b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1092b7c46309SBarry Smith } 1093b7c46309SBarry Smith aj = Aloc->j; 10944af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1095b7c46309SBarry Smith 1096b7c46309SBarry Smith /* copy over the B part */ 1097ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1098b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1099b7c46309SBarry Smith row = a->rstart; 11000452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1101dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1102b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1103416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1104b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1105b7c46309SBarry Smith } 11060452661fSBarry Smith PetscFree(ct); 11076d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11086d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11093638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 11100de55854SLois Curfman McInnes *matout = B; 11110de55854SLois Curfman McInnes } else { 11120de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 11130452661fSBarry Smith PetscFree(a->rowners); 11140de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 11150de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 11160452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 11170452661fSBarry Smith if (a->garray) PetscFree(a->garray); 11180de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1119a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 11200452661fSBarry Smith PetscFree(a); 1121416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 11220452661fSBarry Smith PetscHeaderDestroy(B); 11230de55854SLois Curfman McInnes } 1124b7c46309SBarry Smith return 0; 1125b7c46309SBarry Smith } 1126b7c46309SBarry Smith 1127682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1128682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1129682d7d0cSBarry Smith { 1130682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1131682d7d0cSBarry Smith 1132682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1133682d7d0cSBarry Smith else return 0; 1134682d7d0cSBarry Smith } 1135682d7d0cSBarry Smith 11365a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 11375a838052SSatish Balay { 11385a838052SSatish Balay *bs = 1; 11395a838052SSatish Balay return 0; 11405a838052SSatish Balay } 11415a838052SSatish Balay 1142fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 11433d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 11442f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1145598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 11468a729477SBarry Smith /* -------------------------------------------------------------------*/ 11472ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 114839e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 114944a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 115044a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1151299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1152299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1153299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 115444a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1155b7c46309SBarry Smith MatTranspose_MPIAIJ, 1156a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1157855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1158ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11591eb62cbbSBarry Smith 0, 1160299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1161299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1162299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1163d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1164299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 11653e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1166b49de8d1SLois Curfman McInnes 0,0,0, 1167598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1168052efed2SBarry Smith MatPrintHelp_MPIAIJ, 11695a838052SSatish Balay MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ}; 11708a729477SBarry Smith 11711987afe7SBarry Smith /*@C 1172ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 11733a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 11743a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 11753a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 11763a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 11778a729477SBarry Smith 11788a729477SBarry Smith Input Parameters: 11791eb62cbbSBarry Smith . comm - MPI communicator 11807d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 118192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 118392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 118492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 11867d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 118792e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1188ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1189ff756334SLois Curfman McInnes (same for all local rows) 11902bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 119192e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 11922bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 11932bd5e0b2SLois Curfman McInnes it is zero. 11942bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1195ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 11962bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 11972bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 11982bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 11998a729477SBarry Smith 1200ff756334SLois Curfman McInnes Output Parameter: 120144cd7ae7SLois Curfman McInnes . A - the matrix 12028a729477SBarry Smith 1203ff756334SLois Curfman McInnes Notes: 1204ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1205ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12060002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12070002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1208ff756334SLois Curfman McInnes 1209ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1210ff756334SLois Curfman McInnes (possibly both). 1211ff756334SLois Curfman McInnes 12125511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 12135511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 12145511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 12155511cfe3SLois Curfman McInnes 12165511cfe3SLois Curfman McInnes Options Database Keys: 12175511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 12186e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 12196e62573dSLois Curfman McInnes $ (max limit=5) 12206e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 12216e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 12226e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 12235511cfe3SLois Curfman McInnes 1224e0245417SLois Curfman McInnes Storage Information: 1225e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1226e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1227e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1228e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1229e0245417SLois Curfman McInnes 1230e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 12315ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 12325ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 12335ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 12345ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1235ff756334SLois Curfman McInnes 12365511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 12375511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 12382191d07cSBarry Smith 1239b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1240b810aeb4SBarry Smith $ ------------------- 12415511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 12425511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 12435511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 12445511cfe3SLois Curfman McInnes $ ------------------- 1245b810aeb4SBarry Smith $ 1246b810aeb4SBarry Smith 12475511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 12485511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 12495511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 12505511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 12515511cfe3SLois Curfman McInnes 12525511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 12535511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 12545511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 12553d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 125692e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 12573d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 12583a511b96SLois Curfman McInnes 1259dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1260ff756334SLois Curfman McInnes 1261fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 12628a729477SBarry Smith @*/ 12631eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 126444cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 12658a729477SBarry Smith { 126644cd7ae7SLois Curfman McInnes Mat B; 126744cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 12686abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1269416022c9SBarry Smith 127044cd7ae7SLois Curfman McInnes *A = 0; 127144cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 127244cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 127344cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 127444cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 127544cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 127644cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 127744cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 127844cd7ae7SLois Curfman McInnes B->factor = 0; 127944cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 1280d6dfbf8fSBarry Smith 128144cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 128244cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 128344cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 12841eb62cbbSBarry Smith 1285b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 12862e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 12871987afe7SBarry Smith 1288dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 12891eb62cbbSBarry Smith work[0] = m; work[1] = n; 1290d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1291dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1292dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 12931eb62cbbSBarry Smith } 129444cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 129544cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 129644cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 129744cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 129844cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 129944cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 13001eb62cbbSBarry Smith 13011eb62cbbSBarry Smith /* build local table of row and column ownerships */ 130244cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 130344cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1304603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 130544cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 130644cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 130744cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 130844cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 13098a729477SBarry Smith } 131044cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 131144cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 131244cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 131344cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 131444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 131544cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 13168a729477SBarry Smith } 131744cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 131844cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 13198a729477SBarry Smith 13205ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 132144cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 132244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 13237b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 132444cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 132544cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 13268a729477SBarry Smith 13271eb62cbbSBarry Smith /* build cache for off array entries formed */ 132844cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 132944cd7ae7SLois Curfman McInnes b->colmap = 0; 133044cd7ae7SLois Curfman McInnes b->garray = 0; 133144cd7ae7SLois Curfman McInnes b->roworiented = 1; 13328a729477SBarry Smith 13331eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 133444cd7ae7SLois Curfman McInnes b->lvec = 0; 133544cd7ae7SLois Curfman McInnes b->Mvctx = 0; 13368a729477SBarry Smith 13377a0afa10SBarry Smith /* stuff for MatGetRow() */ 133844cd7ae7SLois Curfman McInnes b->rowindices = 0; 133944cd7ae7SLois Curfman McInnes b->rowvalues = 0; 134044cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 13417a0afa10SBarry Smith 134244cd7ae7SLois Curfman McInnes *A = B; 1343d6dfbf8fSBarry Smith return 0; 1344d6dfbf8fSBarry Smith } 1345c74985f6SBarry Smith 13463d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1347d6dfbf8fSBarry Smith { 1348d6dfbf8fSBarry Smith Mat mat; 1349416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1350a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1351d6dfbf8fSBarry Smith 1352416022c9SBarry Smith *newmat = 0; 13530452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1354a5a9c739SBarry Smith PLogObjectCreate(mat); 13550452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1356416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 135744a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 135844a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1359d6dfbf8fSBarry Smith mat->factor = matin->factor; 1360c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1361d6dfbf8fSBarry Smith 136244cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 136344cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 136444cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 136544cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1366d6dfbf8fSBarry Smith 1367416022c9SBarry Smith a->rstart = oldmat->rstart; 1368416022c9SBarry Smith a->rend = oldmat->rend; 1369416022c9SBarry Smith a->cstart = oldmat->cstart; 1370416022c9SBarry Smith a->cend = oldmat->cend; 137117699dbbSLois Curfman McInnes a->size = oldmat->size; 137217699dbbSLois Curfman McInnes a->rank = oldmat->rank; 137364119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1374bcd2baecSBarry Smith a->rowvalues = 0; 1375bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1376d6dfbf8fSBarry Smith 1377603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1378603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1379603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1380603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1381416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 13822ee70a88SLois Curfman McInnes if (oldmat->colmap) { 13830452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1384416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1385416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1386416022c9SBarry Smith } else a->colmap = 0; 1387*3f41c07dSBarry Smith if (oldmat->garray) { 1388*3f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1389*3f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1390464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1391*3f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1392416022c9SBarry Smith } else a->garray = 0; 1393d6dfbf8fSBarry Smith 1394416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1395416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1396a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1397416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1398416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1399416022c9SBarry Smith PLogObjectParent(mat,a->A); 1400416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1401416022c9SBarry Smith PLogObjectParent(mat,a->B); 14025dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 140325cdf11fSBarry Smith if (flg) { 1404682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1405682d7d0cSBarry Smith } 14068a729477SBarry Smith *newmat = mat; 14078a729477SBarry Smith return 0; 14088a729477SBarry Smith } 1409416022c9SBarry Smith 141077c4ece6SBarry Smith #include "sys.h" 1411416022c9SBarry Smith 141219bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1413416022c9SBarry Smith { 1414d65a2f8fSBarry Smith Mat A; 1415416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1416d65a2f8fSBarry Smith Scalar *vals,*svals; 141719bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1418416022c9SBarry Smith MPI_Status status; 141917699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1420d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 142119bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1422416022c9SBarry Smith 142317699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 142417699dbbSLois Curfman McInnes if (!rank) { 142590ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 142677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1427c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1428416022c9SBarry Smith } 1429416022c9SBarry Smith 1430416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1431416022c9SBarry Smith M = header[1]; N = header[2]; 1432416022c9SBarry Smith /* determine ownership of all rows */ 143317699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 14340452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1435416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1436416022c9SBarry Smith rowners[0] = 0; 143717699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1438416022c9SBarry Smith rowners[i] += rowners[i-1]; 1439416022c9SBarry Smith } 144017699dbbSLois Curfman McInnes rstart = rowners[rank]; 144117699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1442416022c9SBarry Smith 1443416022c9SBarry Smith /* distribute row lengths to all processors */ 14440452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1445416022c9SBarry Smith offlens = ourlens + (rend-rstart); 144617699dbbSLois Curfman McInnes if (!rank) { 14470452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 144877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 14490452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 145017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1451d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 14520452661fSBarry Smith PetscFree(sndcounts); 1453416022c9SBarry Smith } 1454416022c9SBarry Smith else { 1455416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1456416022c9SBarry Smith } 1457416022c9SBarry Smith 145817699dbbSLois Curfman McInnes if (!rank) { 1459416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 14600452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1461cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 146217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1463416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1464416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1465416022c9SBarry Smith } 1466416022c9SBarry Smith } 14670452661fSBarry Smith PetscFree(rowlengths); 1468416022c9SBarry Smith 1469416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1470416022c9SBarry Smith maxnz = 0; 147117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 14720452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1473416022c9SBarry Smith } 14740452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1475416022c9SBarry Smith 1476416022c9SBarry Smith /* read in my part of the matrix column indices */ 1477416022c9SBarry Smith nz = procsnz[0]; 14780452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 147977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1480d65a2f8fSBarry Smith 1481d65a2f8fSBarry Smith /* read in every one elses and ship off */ 148217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1483d65a2f8fSBarry Smith nz = procsnz[i]; 148477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1485d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1486d65a2f8fSBarry Smith } 14870452661fSBarry Smith PetscFree(cols); 1488416022c9SBarry Smith } 1489416022c9SBarry Smith else { 1490416022c9SBarry Smith /* determine buffer space needed for message */ 1491416022c9SBarry Smith nz = 0; 1492416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1493416022c9SBarry Smith nz += ourlens[i]; 1494416022c9SBarry Smith } 14950452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1496416022c9SBarry Smith 1497416022c9SBarry Smith /* receive message of column indices*/ 1498d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1499416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1500c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1501416022c9SBarry Smith } 1502416022c9SBarry Smith 1503416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1504cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1505416022c9SBarry Smith jj = 0; 1506416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1507416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1508d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1509416022c9SBarry Smith jj++; 1510416022c9SBarry Smith } 1511416022c9SBarry Smith } 1512d65a2f8fSBarry Smith 1513d65a2f8fSBarry Smith /* create our matrix */ 1514416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1515416022c9SBarry Smith ourlens[i] -= offlens[i]; 1516416022c9SBarry Smith } 1517d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1518d65a2f8fSBarry Smith A = *newmat; 15196d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1520d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1521d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1522d65a2f8fSBarry Smith } 1523416022c9SBarry Smith 152417699dbbSLois Curfman McInnes if (!rank) { 15250452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1526416022c9SBarry Smith 1527416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1528416022c9SBarry Smith nz = procsnz[0]; 152977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1530d65a2f8fSBarry Smith 1531d65a2f8fSBarry Smith /* insert into matrix */ 1532d65a2f8fSBarry Smith jj = rstart; 1533d65a2f8fSBarry Smith smycols = mycols; 1534d65a2f8fSBarry Smith svals = vals; 1535d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1536d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1537d65a2f8fSBarry Smith smycols += ourlens[i]; 1538d65a2f8fSBarry Smith svals += ourlens[i]; 1539d65a2f8fSBarry Smith jj++; 1540416022c9SBarry Smith } 1541416022c9SBarry Smith 1542d65a2f8fSBarry Smith /* read in other processors and ship out */ 154317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1544416022c9SBarry Smith nz = procsnz[i]; 154577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1546d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1547416022c9SBarry Smith } 15480452661fSBarry Smith PetscFree(procsnz); 1549416022c9SBarry Smith } 1550d65a2f8fSBarry Smith else { 1551d65a2f8fSBarry Smith /* receive numeric values */ 15520452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1553416022c9SBarry Smith 1554d65a2f8fSBarry Smith /* receive message of values*/ 1555d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1556d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1557c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1558d65a2f8fSBarry Smith 1559d65a2f8fSBarry Smith /* insert into matrix */ 1560d65a2f8fSBarry Smith jj = rstart; 1561d65a2f8fSBarry Smith smycols = mycols; 1562d65a2f8fSBarry Smith svals = vals; 1563d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1564d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1565d65a2f8fSBarry Smith smycols += ourlens[i]; 1566d65a2f8fSBarry Smith svals += ourlens[i]; 1567d65a2f8fSBarry Smith jj++; 1568d65a2f8fSBarry Smith } 1569d65a2f8fSBarry Smith } 15700452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1571d65a2f8fSBarry Smith 15726d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15736d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1574416022c9SBarry Smith return 0; 1575416022c9SBarry Smith } 1576