1905e6a2fSBarry Smith 2cb512458SBarry Smith #ifndef lint 3*70f55243SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.159 1996/08/06 16:51:19 balay Exp bsmith $"; 4cb512458SBarry Smith #endif 58a729477SBarry Smith 6*70f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 8d9942c19SSatish Balay #include "src/inline/spops.h" 98a729477SBarry Smith 109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 129e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 139e25ed09SBarry Smith length of colmap equals the global matrix length. 149e25ed09SBarry Smith */ 15905e6a2fSBarry Smith static int CreateColmap_MPIAIJ_Private(Mat mat) 169e25ed09SBarry Smith { 1744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 18ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 19905e6a2fSBarry Smith int n = B->n,i; 20dbb450caSBarry Smith 210452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 23cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 24905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 259e25ed09SBarry Smith return 0; 269e25ed09SBarry Smith } 279e25ed09SBarry Smith 282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 292493cbb0SBarry Smith 3070423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 3417699dbbSLois Curfman McInnes if (aij->size == 1) { 3551c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3648d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 37299609e3SLois Curfman McInnes return 0; 38299609e3SLois Curfman McInnes } 39299609e3SLois Curfman McInnes 404b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 418a729477SBarry Smith { 4244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 46905e6a2fSBarry Smith int roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67905e6a2fSBarry Smith if (!aij->colmap) { 68905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 69905e6a2fSBarry Smith } 70905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 71ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 722493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 734b0e389bSBarry Smith col = in[j]; 74d6dfbf8fSBarry Smith } 759e25ed09SBarry Smith } 764b0e389bSBarry Smith else col = in[j]; 774b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 784b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith } 811eb62cbbSBarry Smith } 821eb62cbbSBarry Smith else { 834b0e389bSBarry Smith if (roworiented) { 844b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 854b0e389bSBarry Smith } 864b0e389bSBarry Smith else { 874b0e389bSBarry Smith row = im[i]; 884b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 894b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 904b0e389bSBarry Smith } 914b0e389bSBarry Smith } 921eb62cbbSBarry Smith } 938a729477SBarry Smith } 948a729477SBarry Smith return 0; 958a729477SBarry Smith } 968a729477SBarry Smith 97b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 98b49de8d1SLois Curfman McInnes { 99b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 100b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 101b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116905e6a2fSBarry Smith if (!aij->colmap) { 117905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 118905e6a2fSBarry Smith } 119905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 120e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 121d9d09a02SSatish Balay else { 122b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes } 126d9d09a02SSatish Balay } 127b49de8d1SLois Curfman McInnes else { 128b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 129b49de8d1SLois Curfman McInnes } 130b49de8d1SLois Curfman McInnes } 131b49de8d1SLois Curfman McInnes return 0; 132b49de8d1SLois Curfman McInnes } 133b49de8d1SLois Curfman McInnes 134fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1358a729477SBarry Smith { 13644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 137d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13817699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13917699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1401eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1416abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1421eb62cbbSBarry Smith InsertMode addv; 1431eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1441eb62cbbSBarry Smith 1451eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 146d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 147dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 148bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1511eb62cbbSBarry Smith 1521eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1530452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 154cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1550452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1561eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1571eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15817699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1591eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1601eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1618a729477SBarry Smith } 1628a729477SBarry Smith } 1638a729477SBarry Smith } 16417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1651eb62cbbSBarry Smith 1661eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1670452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16817699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16917699dbbSLois Curfman McInnes nreceives = work[rank]; 17017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 17117699dbbSLois Curfman McInnes nmax = work[rank]; 1720452661fSBarry Smith PetscFree(work); 1731eb62cbbSBarry Smith 1741eb62cbbSBarry Smith /* post receives: 1751eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1761eb62cbbSBarry Smith (global index,value) we store the global index as a double 177d6dfbf8fSBarry Smith to simplify the message passing. 1781eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1791eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1801eb62cbbSBarry Smith this is a lot of wasted space. 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith 1831eb62cbbSBarry Smith This could be done better. 1841eb62cbbSBarry Smith */ 1850452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18678b31e54SBarry Smith CHKPTRQ(rvalues); 1870452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18878b31e54SBarry Smith CHKPTRQ(recv_waits); 1891eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 190416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1911eb62cbbSBarry Smith comm,recv_waits+i); 1921eb62cbbSBarry Smith } 1931eb62cbbSBarry Smith 1941eb62cbbSBarry Smith /* do sends: 1951eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1961eb62cbbSBarry Smith the ith processor 1971eb62cbbSBarry Smith */ 1980452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1990452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 20078b31e54SBarry Smith CHKPTRQ(send_waits); 2010452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2021eb62cbbSBarry Smith starts[0] = 0; 20317699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2041eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2051eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2061eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2071eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2081eb62cbbSBarry Smith } 2090452661fSBarry Smith PetscFree(owner); 2101eb62cbbSBarry Smith starts[0] = 0; 21117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2121eb62cbbSBarry Smith count = 0; 21317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2141eb62cbbSBarry Smith if (procs[i]) { 215416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2161eb62cbbSBarry Smith comm,send_waits+count++); 2171eb62cbbSBarry Smith } 2181eb62cbbSBarry Smith } 2190452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2201eb62cbbSBarry Smith 2211eb62cbbSBarry Smith /* Free cache space */ 2222191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 22378b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2241eb62cbbSBarry Smith 2251eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2261eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2271eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2281eb62cbbSBarry Smith aij->rmax = nmax; 2291eb62cbbSBarry Smith 2301eb62cbbSBarry Smith return 0; 2311eb62cbbSBarry Smith } 23244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2331eb62cbbSBarry Smith 234fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2351eb62cbbSBarry Smith { 23644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2371eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 238416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 239905e6a2fSBarry Smith int row,col,other_disassembled; 2401eb62cbbSBarry Smith Scalar *values,val; 2411eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2421eb62cbbSBarry Smith 2431eb62cbbSBarry Smith /* wait on receives */ 2441eb62cbbSBarry Smith while (count) { 245d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2461eb62cbbSBarry Smith /* unpack receives into our local space */ 247d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 248c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2491eb62cbbSBarry Smith n = n/3; 2501eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 251227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 252227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2531eb62cbbSBarry Smith val = values[3*i+2]; 2541eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2551eb62cbbSBarry Smith col -= aij->cstart; 2561eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2571eb62cbbSBarry Smith } 2581eb62cbbSBarry Smith else { 259227d817aSBarry Smith if (mat->was_assembled) { 260905e6a2fSBarry Smith if (!aij->colmap) { 261905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262905e6a2fSBarry Smith } 263905e6a2fSBarry Smith col = aij->colmap[col] - 1; 264ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2652493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 266227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 267d6dfbf8fSBarry Smith } 2689e25ed09SBarry Smith } 2691eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith } 2721eb62cbbSBarry Smith count--; 2731eb62cbbSBarry Smith } 2740452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* wait on sends */ 2771eb62cbbSBarry Smith if (aij->nsends) { 2780452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27978b31e54SBarry Smith CHKPTRQ(send_status); 2801eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2810452661fSBarry Smith PetscFree(send_status); 2821eb62cbbSBarry Smith } 2830452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2841eb62cbbSBarry Smith 28564119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 28678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2881eb62cbbSBarry Smith 2892493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2902493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 291227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 292227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2932493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2942493cbb0SBarry Smith } 2952493cbb0SBarry Smith 2966d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 29778b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2985e42470aSBarry Smith } 29978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 30078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3015e42470aSBarry Smith 3027a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3038a729477SBarry Smith return 0; 3048a729477SBarry Smith } 3058a729477SBarry Smith 3062ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3071eb62cbbSBarry Smith { 30844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 309dbd7a890SLois Curfman McInnes int ierr; 31078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 31178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3121eb62cbbSBarry Smith return 0; 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith 3151eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3161eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3171eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3181eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3191eb62cbbSBarry Smith routine. 3201eb62cbbSBarry Smith */ 32144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3221eb62cbbSBarry Smith { 32344a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 32417699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3256abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 32617699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3275392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 328d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 329d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3301eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3311eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3321eb62cbbSBarry Smith IS istmp; 3331eb62cbbSBarry Smith 33477c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 33578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3361eb62cbbSBarry Smith 3371eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3380452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 339cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3400452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3411eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3421eb62cbbSBarry Smith idx = rows[i]; 3431eb62cbbSBarry Smith found = 0; 34417699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3451eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3461eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3471eb62cbbSBarry Smith } 3481eb62cbbSBarry Smith } 349bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3501eb62cbbSBarry Smith } 35117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3540452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 35517699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 35617699dbbSLois Curfman McInnes nrecvs = work[rank]; 35717699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35817699dbbSLois Curfman McInnes nmax = work[rank]; 3590452661fSBarry Smith PetscFree(work); 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith /* post receives: */ 3620452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 36378b31e54SBarry Smith CHKPTRQ(rvalues); 3640452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 36578b31e54SBarry Smith CHKPTRQ(recv_waits); 3661eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 367416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3681eb62cbbSBarry Smith } 3691eb62cbbSBarry Smith 3701eb62cbbSBarry Smith /* do sends: 3711eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3721eb62cbbSBarry Smith the ith processor 3731eb62cbbSBarry Smith */ 3740452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3750452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 37678b31e54SBarry Smith CHKPTRQ(send_waits); 3770452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3781eb62cbbSBarry Smith starts[0] = 0; 37917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3801eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3811eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3841eb62cbbSBarry Smith 3851eb62cbbSBarry Smith starts[0] = 0; 38617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3871eb62cbbSBarry Smith count = 0; 38817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3891eb62cbbSBarry Smith if (procs[i]) { 390416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3911eb62cbbSBarry Smith } 3921eb62cbbSBarry Smith } 3930452661fSBarry Smith PetscFree(starts); 3941eb62cbbSBarry Smith 39517699dbbSLois Curfman McInnes base = owners[rank]; 3961eb62cbbSBarry Smith 3971eb62cbbSBarry Smith /* wait on receives */ 3980452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3991eb62cbbSBarry Smith source = lens + nrecvs; 4001eb62cbbSBarry Smith count = nrecvs; slen = 0; 4011eb62cbbSBarry Smith while (count) { 402d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4031eb62cbbSBarry Smith /* unpack receives into our local space */ 4041eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 405d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 406d6dfbf8fSBarry Smith lens[imdex] = n; 4071eb62cbbSBarry Smith slen += n; 4081eb62cbbSBarry Smith count--; 4091eb62cbbSBarry Smith } 4100452661fSBarry Smith PetscFree(recv_waits); 4111eb62cbbSBarry Smith 4121eb62cbbSBarry Smith /* move the data into the send scatter */ 4130452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4141eb62cbbSBarry Smith count = 0; 4151eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4161eb62cbbSBarry Smith values = rvalues + i*nmax; 4171eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4181eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4191eb62cbbSBarry Smith } 4201eb62cbbSBarry Smith } 4210452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4220452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* actually zap the local rows */ 425416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 426464493b3SBarry Smith PLogObjectParent(A,istmp); 4270452661fSBarry Smith PetscFree(lrows); 42878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4311eb62cbbSBarry Smith 4321eb62cbbSBarry Smith /* wait on sends */ 4331eb62cbbSBarry Smith if (nsends) { 4340452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 43578b31e54SBarry Smith CHKPTRQ(send_status); 4361eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4370452661fSBarry Smith PetscFree(send_status); 4381eb62cbbSBarry Smith } 4390452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4401eb62cbbSBarry Smith 4411eb62cbbSBarry Smith return 0; 4421eb62cbbSBarry Smith } 4431eb62cbbSBarry Smith 444416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4451eb62cbbSBarry Smith { 446416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 447fbd6ef76SBarry Smith int ierr,nt; 448416022c9SBarry Smith 449a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 450fbd6ef76SBarry Smith if (nt != a->n) { 45147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 452fbd6ef76SBarry Smith } 453a2ce50c7SBarry Smith ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 454fbd6ef76SBarry Smith if (nt != a->m) { 45547b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy"); 456fbd6ef76SBarry Smith } 45764119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45838597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 45964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 46038597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4611eb62cbbSBarry Smith return 0; 4621eb62cbbSBarry Smith } 4631eb62cbbSBarry Smith 464416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 465da3a660dSBarry Smith { 466416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467da3a660dSBarry Smith int ierr; 46864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46938597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 47064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 47138597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 472da3a660dSBarry Smith return 0; 473da3a660dSBarry Smith } 474da3a660dSBarry Smith 475416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 476da3a660dSBarry Smith { 477416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 478da3a660dSBarry Smith int ierr; 479da3a660dSBarry Smith 480da3a660dSBarry Smith /* do nondiagonal part */ 48138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 482da3a660dSBarry Smith /* send it on its way */ 483416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 48464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 485da3a660dSBarry Smith /* do local part */ 48638597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 487da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 488da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 489da3a660dSBarry Smith /* but is not perhaps always true. */ 490416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 49164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 492da3a660dSBarry Smith return 0; 493da3a660dSBarry Smith } 494da3a660dSBarry Smith 495416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 496da3a660dSBarry Smith { 497416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 498da3a660dSBarry Smith int ierr; 499da3a660dSBarry Smith 500da3a660dSBarry Smith /* do nondiagonal part */ 50138597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 502da3a660dSBarry Smith /* send it on its way */ 503416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 50464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 505da3a660dSBarry Smith /* do local part */ 50638597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 507da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 508da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 509da3a660dSBarry Smith /* but is not perhaps always true. */ 510416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 51164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 512da3a660dSBarry Smith return 0; 513da3a660dSBarry Smith } 514da3a660dSBarry Smith 5151eb62cbbSBarry Smith /* 5161eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5171eb62cbbSBarry Smith diagonal block 5181eb62cbbSBarry Smith */ 519416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5201eb62cbbSBarry Smith { 521416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 52244cd7ae7SLois Curfman McInnes if (a->M != a->N) 52344cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 524416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5251eb62cbbSBarry Smith } 5261eb62cbbSBarry Smith 527052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 528052efed2SBarry Smith { 529052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 530052efed2SBarry Smith int ierr; 531052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 532052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 533052efed2SBarry Smith return 0; 534052efed2SBarry Smith } 535052efed2SBarry Smith 53644a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5371eb62cbbSBarry Smith { 5381eb62cbbSBarry Smith Mat mat = (Mat) obj; 53944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5401eb62cbbSBarry Smith int ierr; 541a5a9c739SBarry Smith #if defined(PETSC_LOG) 5426e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 543a5a9c739SBarry Smith #endif 5440452661fSBarry Smith PetscFree(aij->rowners); 54578b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 54678b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5470452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5480452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5491eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 550a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5517a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5520452661fSBarry Smith PetscFree(aij); 553a5a9c739SBarry Smith PLogObjectDestroy(mat); 5540452661fSBarry Smith PetscHeaderDestroy(mat); 5551eb62cbbSBarry Smith return 0; 5561eb62cbbSBarry Smith } 5577857610eSBarry Smith #include "draw.h" 558b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 559ee50ffe9SBarry Smith 560416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5611eb62cbbSBarry Smith { 562416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 563416022c9SBarry Smith int ierr; 564416022c9SBarry Smith 56517699dbbSLois Curfman McInnes if (aij->size == 1) { 566416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 567416022c9SBarry Smith } 568a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 569416022c9SBarry Smith return 0; 570416022c9SBarry Smith } 571416022c9SBarry Smith 572d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 573416022c9SBarry Smith { 57444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 575dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 576a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 577d636dbe3SBarry Smith FILE *fd; 57819bcc07fSBarry Smith ViewerType vtype; 579416022c9SBarry Smith 58019bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 58119bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 58290ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 58390ace30eSBarry Smith if (format == ASCII_FORMAT_INFO_DETAILED) { 58495e01e2fSLois Curfman McInnes int nz, nzalloc, mem, flg; 585a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 58690ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 587a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 58895e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 58977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 59095e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 59195e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59295e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 59395e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59408480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 59595e01e2fSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 59608480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 59795e01e2fSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 598a56f8943SBarry Smith fflush(fd); 59977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 600a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 601416022c9SBarry Smith return 0; 602416022c9SBarry Smith } 60390ace30eSBarry Smith else if (format == ASCII_FORMAT_INFO) { 60408480c60SBarry Smith return 0; 60508480c60SBarry Smith } 606416022c9SBarry Smith } 607416022c9SBarry Smith 60819bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 60919bcc07fSBarry Smith Draw draw; 61019bcc07fSBarry Smith PetscTruth isnull; 61119bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 61219bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 61319bcc07fSBarry Smith } 61419bcc07fSBarry Smith 61519bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 61690ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 61777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 618d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 61917699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6201eb62cbbSBarry Smith aij->cend); 62178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 62278b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 623d13ab20cSBarry Smith fflush(fd); 62477c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 625d13ab20cSBarry Smith } 626416022c9SBarry Smith else { 627a56f8943SBarry Smith int size = aij->size; 628a56f8943SBarry Smith rank = aij->rank; 62917699dbbSLois Curfman McInnes if (size == 1) { 63078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 63195373324SBarry Smith } 63295373324SBarry Smith else { 63395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 63495373324SBarry Smith Mat A; 635ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6362eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 63795373324SBarry Smith Scalar *a; 6382ee70a88SLois Curfman McInnes 63917699dbbSLois Curfman McInnes if (!rank) { 640b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 641c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64295373324SBarry Smith } 64395373324SBarry Smith else { 644b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 645c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64695373324SBarry Smith } 647464493b3SBarry Smith PLogObjectParent(mat,A); 648416022c9SBarry Smith 64995373324SBarry Smith /* copy over the A part */ 650ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6512ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 65295373324SBarry Smith row = aij->rstart; 653dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 65495373324SBarry Smith for ( i=0; i<m; i++ ) { 655416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 65695373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 65795373324SBarry Smith } 6582ee70a88SLois Curfman McInnes aj = Aloc->j; 659dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 66095373324SBarry Smith 66195373324SBarry Smith /* copy over the B part */ 662ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6632ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66495373324SBarry Smith row = aij->rstart; 6650452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 666dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 66795373324SBarry Smith for ( i=0; i<m; i++ ) { 668416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 66995373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 67095373324SBarry Smith } 6710452661fSBarry Smith PetscFree(ct); 6726d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6736d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 67417699dbbSLois Curfman McInnes if (!rank) { 67578b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 67695373324SBarry Smith } 67778b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 67895373324SBarry Smith } 67995373324SBarry Smith } 6801eb62cbbSBarry Smith return 0; 6811eb62cbbSBarry Smith } 6821eb62cbbSBarry Smith 683416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 684416022c9SBarry Smith { 685416022c9SBarry Smith Mat mat = (Mat) obj; 686416022c9SBarry Smith int ierr; 68719bcc07fSBarry Smith ViewerType vtype; 688416022c9SBarry Smith 68919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 69019bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 69119bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 692d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 693416022c9SBarry Smith } 69419bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 695416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 696416022c9SBarry Smith } 697416022c9SBarry Smith return 0; 698416022c9SBarry Smith } 699416022c9SBarry Smith 7001eb62cbbSBarry Smith /* 7011eb62cbbSBarry Smith This has to provide several versions. 7021eb62cbbSBarry Smith 7031eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7041eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 705d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7061eb62cbbSBarry Smith */ 707fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 708dbb450caSBarry Smith double fshift,int its,Vec xx) 7098a729477SBarry Smith { 71044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 711d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 712ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 713c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7146abc6512SBarry Smith int ierr,*idx, *diag; 715416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7168a729477SBarry Smith 717d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 718dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 719dbb450caSBarry Smith ls = ls + shift; 720ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 721d6dfbf8fSBarry Smith diag = A->diag; 722c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 723da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 72438597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 725da3a660dSBarry Smith } 72664119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72778b31e54SBarry Smith CHKERRQ(ierr); 72864119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 72978b31e54SBarry Smith CHKERRQ(ierr); 730d6dfbf8fSBarry Smith while (its--) { 731d6dfbf8fSBarry Smith /* go down through the rows */ 732d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 733d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 734dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 735dbb450caSBarry Smith v = A->a + A->i[i] + shift; 736d6dfbf8fSBarry Smith sum = b[i]; 737d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 738dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 739d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 740dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 741dbb450caSBarry Smith v = B->a + B->i[i] + shift; 742d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 743d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 744d6dfbf8fSBarry Smith } 745d6dfbf8fSBarry Smith /* come up through the rows */ 746d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 747d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 748dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 749dbb450caSBarry Smith v = A->a + A->i[i] + shift; 750d6dfbf8fSBarry Smith sum = b[i]; 751d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 752dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 753d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 754dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 755dbb450caSBarry Smith v = B->a + B->i[i] + shift; 756d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 757d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 758d6dfbf8fSBarry Smith } 759d6dfbf8fSBarry Smith } 760d6dfbf8fSBarry Smith } 761d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 762da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 76338597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 764da3a660dSBarry Smith } 76564119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76678b31e54SBarry Smith CHKERRQ(ierr); 76764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 76878b31e54SBarry Smith CHKERRQ(ierr); 769d6dfbf8fSBarry Smith while (its--) { 770d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 771d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 772dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 773dbb450caSBarry Smith v = A->a + A->i[i] + shift; 774d6dfbf8fSBarry Smith sum = b[i]; 775d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 776dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 777d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 778dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 779dbb450caSBarry Smith v = B->a + B->i[i] + shift; 780d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 781d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 782d6dfbf8fSBarry Smith } 783d6dfbf8fSBarry Smith } 784d6dfbf8fSBarry Smith } 785d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 786da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 78738597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 788da3a660dSBarry Smith } 78964119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 79078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 79164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 79278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 793d6dfbf8fSBarry Smith while (its--) { 794d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 795d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 796dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 797dbb450caSBarry Smith v = A->a + A->i[i] + shift; 798d6dfbf8fSBarry Smith sum = b[i]; 799d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 800dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 801d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 802dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 803dbb450caSBarry Smith v = B->a + B->i[i] + shift; 804d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 805d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806d6dfbf8fSBarry Smith } 807d6dfbf8fSBarry Smith } 808d6dfbf8fSBarry Smith } 809c16cb8f2SBarry Smith else { 810c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 811c16cb8f2SBarry Smith } 8128a729477SBarry Smith return 0; 8138a729477SBarry Smith } 814a66be287SLois Curfman McInnes 815fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 816fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 817a66be287SLois Curfman McInnes { 818a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 819a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 820a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 821a66be287SLois Curfman McInnes 82278b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 82378b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 824a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 825a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 826bcd2baecSBarry Smith if (nz) *nz = isend[0]; 827bcd2baecSBarry Smith if (nzalloc) *nzalloc = isend[1]; 828bcd2baecSBarry Smith if (mem) *mem = isend[2]; 829a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 830d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 831bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 832bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 833bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 834a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 835d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 836bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 837bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 838bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 839a66be287SLois Curfman McInnes } 840a66be287SLois Curfman McInnes return 0; 841a66be287SLois Curfman McInnes } 842a66be287SLois Curfman McInnes 843299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 844299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 845299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 846299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 847299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 848299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 849299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 850299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 851299609e3SLois Curfman McInnes 852416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 853c74985f6SBarry Smith { 854c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 855c74985f6SBarry Smith 8566d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 8576d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 8586d4a8577SBarry Smith op == MAT_COLUMNS_SORTED || 8596d4a8577SBarry Smith op == MAT_ROW_ORIENTED) { 860c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 861c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 862c74985f6SBarry Smith } 8636d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 8646d4a8577SBarry Smith op == MAT_SYMMETRIC || 8656d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 8666d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 86794a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 8686d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 8694b0e389bSBarry Smith a->roworiented = 0; 8704b0e389bSBarry Smith MatSetOption(a->A,op); 8714b0e389bSBarry Smith MatSetOption(a->B,op); 8724b0e389bSBarry Smith } 8736d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 8746d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 875c0bbcb79SLois Curfman McInnes else 8764d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 877c74985f6SBarry Smith return 0; 878c74985f6SBarry Smith } 879c74985f6SBarry Smith 880d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 881c74985f6SBarry Smith { 88244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 883c74985f6SBarry Smith *m = mat->M; *n = mat->N; 884c74985f6SBarry Smith return 0; 885c74985f6SBarry Smith } 886c74985f6SBarry Smith 887d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 888c74985f6SBarry Smith { 88944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 890b7c46309SBarry Smith *m = mat->m; *n = mat->N; 891c74985f6SBarry Smith return 0; 892c74985f6SBarry Smith } 893c74985f6SBarry Smith 894d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 895c74985f6SBarry Smith { 89644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 897c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 898c74985f6SBarry Smith return 0; 899c74985f6SBarry Smith } 900c74985f6SBarry Smith 9016d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9026d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9036d84be18SBarry Smith 9046d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 90539e00950SLois Curfman McInnes { 906154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 90770f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 908154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 909154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 91070f0671dSBarry Smith int *cmap, *idx_p; 91139e00950SLois Curfman McInnes 9127a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 9137a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 9147a0afa10SBarry Smith 91570f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 9167a0afa10SBarry Smith /* 9177a0afa10SBarry Smith allocate enough space to hold information from the longest row. 9187a0afa10SBarry Smith */ 9197a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 920c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 921c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 9227a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 9237a0afa10SBarry Smith if (max < tmp) { max = tmp; } 9247a0afa10SBarry Smith } 9257a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 9267a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 9277a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 9287a0afa10SBarry Smith } 9297a0afa10SBarry Smith 930b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 931abc0e9e4SLois Curfman McInnes lrow = row - rstart; 93239e00950SLois Curfman McInnes 933154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 934154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 935154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 93638597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 93738597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 938154123eaSLois Curfman McInnes nztot = nzA + nzB; 939154123eaSLois Curfman McInnes 94070f0671dSBarry Smith cmap = mat->garray; 941154123eaSLois Curfman McInnes if (v || idx) { 942154123eaSLois Curfman McInnes if (nztot) { 943154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 94470f0671dSBarry Smith int imark = -1; 945154123eaSLois Curfman McInnes if (v) { 94670f0671dSBarry Smith *v = v_p = mat->rowvalues; 94739e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 94870f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 949154123eaSLois Curfman McInnes else break; 950154123eaSLois Curfman McInnes } 951154123eaSLois Curfman McInnes imark = i; 95270f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 95370f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 954154123eaSLois Curfman McInnes } 955154123eaSLois Curfman McInnes if (idx) { 95670f0671dSBarry Smith *idx = idx_p = mat->rowindices; 95770f0671dSBarry Smith if (imark > -1) { 95870f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 95970f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 96070f0671dSBarry Smith } 96170f0671dSBarry Smith } else { 962154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 96370f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 964154123eaSLois Curfman McInnes else break; 965154123eaSLois Curfman McInnes } 966154123eaSLois Curfman McInnes imark = i; 96770f0671dSBarry Smith } 96870f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 96970f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 97039e00950SLois Curfman McInnes } 97139e00950SLois Curfman McInnes } 9721ca473b0SSatish Balay else { 9731ca473b0SSatish Balay if (idx) *idx = 0; 9741ca473b0SSatish Balay if (v) *v = 0; 9751ca473b0SSatish Balay } 976154123eaSLois Curfman McInnes } 97739e00950SLois Curfman McInnes *nz = nztot; 97838597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 97938597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 98039e00950SLois Curfman McInnes return 0; 98139e00950SLois Curfman McInnes } 98239e00950SLois Curfman McInnes 9836d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 98439e00950SLois Curfman McInnes { 9857a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 9867a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 9877a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 9887a0afa10SBarry Smith } 9897a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 99039e00950SLois Curfman McInnes return 0; 99139e00950SLois Curfman McInnes } 99239e00950SLois Curfman McInnes 993cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 994855ac2c5SLois Curfman McInnes { 995855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 996ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 997416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 998416022c9SBarry Smith double sum = 0.0; 99904ca555eSLois Curfman McInnes Scalar *v; 100004ca555eSLois Curfman McInnes 100117699dbbSLois Curfman McInnes if (aij->size == 1) { 100214183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 100337fa93a5SLois Curfman McInnes } else { 100404ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 100504ca555eSLois Curfman McInnes v = amat->a; 100604ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 100704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 100804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 100904ca555eSLois Curfman McInnes #else 101004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 101104ca555eSLois Curfman McInnes #endif 101204ca555eSLois Curfman McInnes } 101304ca555eSLois Curfman McInnes v = bmat->a; 101404ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 101504ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 101604ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 101704ca555eSLois Curfman McInnes #else 101804ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 101904ca555eSLois Curfman McInnes #endif 102004ca555eSLois Curfman McInnes } 10216d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 102204ca555eSLois Curfman McInnes *norm = sqrt(*norm); 102304ca555eSLois Curfman McInnes } 102404ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 102504ca555eSLois Curfman McInnes double *tmp, *tmp2; 102604ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 10270452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 10280452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1029cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 103004ca555eSLois Curfman McInnes *norm = 0.0; 103104ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 103204ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1033579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 103404ca555eSLois Curfman McInnes } 103504ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 103604ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1037579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 103804ca555eSLois Curfman McInnes } 10396d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 104004ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 104104ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 104204ca555eSLois Curfman McInnes } 10430452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 104404ca555eSLois Curfman McInnes } 104504ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1046515d9167SLois Curfman McInnes double ntemp = 0.0; 104704ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1048dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 104904ca555eSLois Curfman McInnes sum = 0.0; 105004ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1051cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 105204ca555eSLois Curfman McInnes } 1053dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 105404ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1055cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 105604ca555eSLois Curfman McInnes } 1057515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 105804ca555eSLois Curfman McInnes } 10596d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 106004ca555eSLois Curfman McInnes } 106104ca555eSLois Curfman McInnes else { 1062515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 106304ca555eSLois Curfman McInnes } 106437fa93a5SLois Curfman McInnes } 1065855ac2c5SLois Curfman McInnes return 0; 1066855ac2c5SLois Curfman McInnes } 1067855ac2c5SLois Curfman McInnes 10680de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1069b7c46309SBarry Smith { 1070b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1071dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1072416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1073416022c9SBarry Smith Mat B; 1074b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1075b7c46309SBarry Smith Scalar *array; 1076b7c46309SBarry Smith 10773638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 10783638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1079b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1080b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1081b7c46309SBarry Smith 1082b7c46309SBarry Smith /* copy over the A part */ 1083ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1084b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1085b7c46309SBarry Smith row = a->rstart; 1086dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1087b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1088416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1089b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1090b7c46309SBarry Smith } 1091b7c46309SBarry Smith aj = Aloc->j; 10924af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1093b7c46309SBarry Smith 1094b7c46309SBarry Smith /* copy over the B part */ 1095ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1096b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1097b7c46309SBarry Smith row = a->rstart; 10980452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1099dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1100b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1101416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1102b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1103b7c46309SBarry Smith } 11040452661fSBarry Smith PetscFree(ct); 11056d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11066d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11073638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 11080de55854SLois Curfman McInnes *matout = B; 11090de55854SLois Curfman McInnes } else { 11100de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 11110452661fSBarry Smith PetscFree(a->rowners); 11120de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 11130de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 11140452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 11150452661fSBarry Smith if (a->garray) PetscFree(a->garray); 11160de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1117a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 11180452661fSBarry Smith PetscFree(a); 1119416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 11200452661fSBarry Smith PetscHeaderDestroy(B); 11210de55854SLois Curfman McInnes } 1122b7c46309SBarry Smith return 0; 1123b7c46309SBarry Smith } 1124b7c46309SBarry Smith 1125682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1126682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1127682d7d0cSBarry Smith { 1128682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1129682d7d0cSBarry Smith 1130682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1131682d7d0cSBarry Smith else return 0; 1132682d7d0cSBarry Smith } 1133682d7d0cSBarry Smith 11345a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 11355a838052SSatish Balay { 11365a838052SSatish Balay *bs = 1; 11375a838052SSatish Balay return 0; 11385a838052SSatish Balay } 11395a838052SSatish Balay 1140fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 11413d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 11422f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1143598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 11448a729477SBarry Smith /* -------------------------------------------------------------------*/ 11452ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 114639e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 114744a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 114844a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1149299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1150299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1151299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 115244a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1153b7c46309SBarry Smith MatTranspose_MPIAIJ, 1154a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1155855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1156ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11571eb62cbbSBarry Smith 0, 1158299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1159299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1160299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1161d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1162299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 11633e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1164b49de8d1SLois Curfman McInnes 0,0,0, 1165598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1166052efed2SBarry Smith MatPrintHelp_MPIAIJ, 11675a838052SSatish Balay MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ}; 11688a729477SBarry Smith 11691987afe7SBarry Smith /*@C 1170ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 11713a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 11723a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 11733a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 11743a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 11758a729477SBarry Smith 11768a729477SBarry Smith Input Parameters: 11771eb62cbbSBarry Smith . comm - MPI communicator 11787d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 117992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118092e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 118192e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 118292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118392e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 11847d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 118592e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1186ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1187ff756334SLois Curfman McInnes (same for all local rows) 11882bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 118992e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 11902bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 11912bd5e0b2SLois Curfman McInnes it is zero. 11922bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1193ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 11942bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 11952bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 11962bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 11978a729477SBarry Smith 1198ff756334SLois Curfman McInnes Output Parameter: 119944cd7ae7SLois Curfman McInnes . A - the matrix 12008a729477SBarry Smith 1201ff756334SLois Curfman McInnes Notes: 1202ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1203ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12040002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12050002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1206ff756334SLois Curfman McInnes 1207ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1208ff756334SLois Curfman McInnes (possibly both). 1209ff756334SLois Curfman McInnes 12105511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 12115511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 12125511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 12135511cfe3SLois Curfman McInnes 12145511cfe3SLois Curfman McInnes Options Database Keys: 12155511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 12166e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 12176e62573dSLois Curfman McInnes $ (max limit=5) 12186e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 12196e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 12206e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 12215511cfe3SLois Curfman McInnes 1222e0245417SLois Curfman McInnes Storage Information: 1223e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1224e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1225e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1226e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1227e0245417SLois Curfman McInnes 1228e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 12295ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 12305ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 12315ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 12325ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1233ff756334SLois Curfman McInnes 12345511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 12355511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 12362191d07cSBarry Smith 1237b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1238b810aeb4SBarry Smith $ ------------------- 12395511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 12405511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 12415511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 12425511cfe3SLois Curfman McInnes $ ------------------- 1243b810aeb4SBarry Smith $ 1244b810aeb4SBarry Smith 12455511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 12465511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 12475511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 12485511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 12495511cfe3SLois Curfman McInnes 12505511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 12515511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 12525511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 12533d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 125492e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 12553d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 12563a511b96SLois Curfman McInnes 1257dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1258ff756334SLois Curfman McInnes 1259fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 12608a729477SBarry Smith @*/ 12611eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 126244cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 12638a729477SBarry Smith { 126444cd7ae7SLois Curfman McInnes Mat B; 126544cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 12666abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1267416022c9SBarry Smith 126844cd7ae7SLois Curfman McInnes *A = 0; 126944cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 127044cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 127144cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 127244cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 127344cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 127444cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 127544cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 127644cd7ae7SLois Curfman McInnes B->factor = 0; 127744cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 1278d6dfbf8fSBarry Smith 127944cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 128044cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 128144cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 12821eb62cbbSBarry Smith 1283b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 12842e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 12851987afe7SBarry Smith 1286dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 12871eb62cbbSBarry Smith work[0] = m; work[1] = n; 1288d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1289dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1290dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 12911eb62cbbSBarry Smith } 129244cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 129344cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 129444cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 129544cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 129644cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 129744cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 12981eb62cbbSBarry Smith 12991eb62cbbSBarry Smith /* build local table of row and column ownerships */ 130044cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 130144cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1302603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 130344cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 130444cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 130544cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 130644cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 13078a729477SBarry Smith } 130844cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 130944cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 131044cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 131144cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 131244cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 131344cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 13148a729477SBarry Smith } 131544cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 131644cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 13178a729477SBarry Smith 13185ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 131944cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 132044cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 13217b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 132244cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 132344cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 13248a729477SBarry Smith 13251eb62cbbSBarry Smith /* build cache for off array entries formed */ 132644cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 132744cd7ae7SLois Curfman McInnes b->colmap = 0; 132844cd7ae7SLois Curfman McInnes b->garray = 0; 132944cd7ae7SLois Curfman McInnes b->roworiented = 1; 13308a729477SBarry Smith 13311eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 133244cd7ae7SLois Curfman McInnes b->lvec = 0; 133344cd7ae7SLois Curfman McInnes b->Mvctx = 0; 13348a729477SBarry Smith 13357a0afa10SBarry Smith /* stuff for MatGetRow() */ 133644cd7ae7SLois Curfman McInnes b->rowindices = 0; 133744cd7ae7SLois Curfman McInnes b->rowvalues = 0; 133844cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 13397a0afa10SBarry Smith 134044cd7ae7SLois Curfman McInnes *A = B; 1341d6dfbf8fSBarry Smith return 0; 1342d6dfbf8fSBarry Smith } 1343c74985f6SBarry Smith 13443d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1345d6dfbf8fSBarry Smith { 1346d6dfbf8fSBarry Smith Mat mat; 1347416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1348a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1349d6dfbf8fSBarry Smith 1350416022c9SBarry Smith *newmat = 0; 13510452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1352a5a9c739SBarry Smith PLogObjectCreate(mat); 13530452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1354416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 135544a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 135644a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1357d6dfbf8fSBarry Smith mat->factor = matin->factor; 1358c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1359d6dfbf8fSBarry Smith 136044cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 136144cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 136244cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 136344cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1364d6dfbf8fSBarry Smith 1365416022c9SBarry Smith a->rstart = oldmat->rstart; 1366416022c9SBarry Smith a->rend = oldmat->rend; 1367416022c9SBarry Smith a->cstart = oldmat->cstart; 1368416022c9SBarry Smith a->cend = oldmat->cend; 136917699dbbSLois Curfman McInnes a->size = oldmat->size; 137017699dbbSLois Curfman McInnes a->rank = oldmat->rank; 137164119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1372bcd2baecSBarry Smith a->rowvalues = 0; 1373bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1374d6dfbf8fSBarry Smith 1375603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1376603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1377603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1378603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1379416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 13802ee70a88SLois Curfman McInnes if (oldmat->colmap) { 13810452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1382416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1383416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1384416022c9SBarry Smith } else a->colmap = 0; 1385ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 13860452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1387464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1388416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1389416022c9SBarry Smith } else a->garray = 0; 1390d6dfbf8fSBarry Smith 1391416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1392416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1393a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1394416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1395416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1396416022c9SBarry Smith PLogObjectParent(mat,a->A); 1397416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1398416022c9SBarry Smith PLogObjectParent(mat,a->B); 13995dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 140025cdf11fSBarry Smith if (flg) { 1401682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1402682d7d0cSBarry Smith } 14038a729477SBarry Smith *newmat = mat; 14048a729477SBarry Smith return 0; 14058a729477SBarry Smith } 1406416022c9SBarry Smith 140777c4ece6SBarry Smith #include "sys.h" 1408416022c9SBarry Smith 140919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1410416022c9SBarry Smith { 1411d65a2f8fSBarry Smith Mat A; 1412416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1413d65a2f8fSBarry Smith Scalar *vals,*svals; 141419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1415416022c9SBarry Smith MPI_Status status; 141617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1417d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 141819bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1419416022c9SBarry Smith 142017699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 142117699dbbSLois Curfman McInnes if (!rank) { 142290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 142377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1424c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1425416022c9SBarry Smith } 1426416022c9SBarry Smith 1427416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1428416022c9SBarry Smith M = header[1]; N = header[2]; 1429416022c9SBarry Smith /* determine ownership of all rows */ 143017699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 14310452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1432416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1433416022c9SBarry Smith rowners[0] = 0; 143417699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1435416022c9SBarry Smith rowners[i] += rowners[i-1]; 1436416022c9SBarry Smith } 143717699dbbSLois Curfman McInnes rstart = rowners[rank]; 143817699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1439416022c9SBarry Smith 1440416022c9SBarry Smith /* distribute row lengths to all processors */ 14410452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1442416022c9SBarry Smith offlens = ourlens + (rend-rstart); 144317699dbbSLois Curfman McInnes if (!rank) { 14440452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 144577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 14460452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 144717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1448d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 14490452661fSBarry Smith PetscFree(sndcounts); 1450416022c9SBarry Smith } 1451416022c9SBarry Smith else { 1452416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1453416022c9SBarry Smith } 1454416022c9SBarry Smith 145517699dbbSLois Curfman McInnes if (!rank) { 1456416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 14570452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1458cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 145917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1460416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1461416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1462416022c9SBarry Smith } 1463416022c9SBarry Smith } 14640452661fSBarry Smith PetscFree(rowlengths); 1465416022c9SBarry Smith 1466416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1467416022c9SBarry Smith maxnz = 0; 146817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 14690452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1470416022c9SBarry Smith } 14710452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1472416022c9SBarry Smith 1473416022c9SBarry Smith /* read in my part of the matrix column indices */ 1474416022c9SBarry Smith nz = procsnz[0]; 14750452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 147677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1477d65a2f8fSBarry Smith 1478d65a2f8fSBarry Smith /* read in every one elses and ship off */ 147917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1480d65a2f8fSBarry Smith nz = procsnz[i]; 148177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1482d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1483d65a2f8fSBarry Smith } 14840452661fSBarry Smith PetscFree(cols); 1485416022c9SBarry Smith } 1486416022c9SBarry Smith else { 1487416022c9SBarry Smith /* determine buffer space needed for message */ 1488416022c9SBarry Smith nz = 0; 1489416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1490416022c9SBarry Smith nz += ourlens[i]; 1491416022c9SBarry Smith } 14920452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1493416022c9SBarry Smith 1494416022c9SBarry Smith /* receive message of column indices*/ 1495d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1496416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1497c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1498416022c9SBarry Smith } 1499416022c9SBarry Smith 1500416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1501cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1502416022c9SBarry Smith jj = 0; 1503416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1504416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1505d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1506416022c9SBarry Smith jj++; 1507416022c9SBarry Smith } 1508416022c9SBarry Smith } 1509d65a2f8fSBarry Smith 1510d65a2f8fSBarry Smith /* create our matrix */ 1511416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1512416022c9SBarry Smith ourlens[i] -= offlens[i]; 1513416022c9SBarry Smith } 1514d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1515d65a2f8fSBarry Smith A = *newmat; 15166d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1517d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1518d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1519d65a2f8fSBarry Smith } 1520416022c9SBarry Smith 152117699dbbSLois Curfman McInnes if (!rank) { 15220452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1523416022c9SBarry Smith 1524416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1525416022c9SBarry Smith nz = procsnz[0]; 152677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1527d65a2f8fSBarry Smith 1528d65a2f8fSBarry Smith /* insert into matrix */ 1529d65a2f8fSBarry Smith jj = rstart; 1530d65a2f8fSBarry Smith smycols = mycols; 1531d65a2f8fSBarry Smith svals = vals; 1532d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1533d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1534d65a2f8fSBarry Smith smycols += ourlens[i]; 1535d65a2f8fSBarry Smith svals += ourlens[i]; 1536d65a2f8fSBarry Smith jj++; 1537416022c9SBarry Smith } 1538416022c9SBarry Smith 1539d65a2f8fSBarry Smith /* read in other processors and ship out */ 154017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1541416022c9SBarry Smith nz = procsnz[i]; 154277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1543d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1544416022c9SBarry Smith } 15450452661fSBarry Smith PetscFree(procsnz); 1546416022c9SBarry Smith } 1547d65a2f8fSBarry Smith else { 1548d65a2f8fSBarry Smith /* receive numeric values */ 15490452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1550416022c9SBarry Smith 1551d65a2f8fSBarry Smith /* receive message of values*/ 1552d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1553d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1554c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1555d65a2f8fSBarry Smith 1556d65a2f8fSBarry Smith /* insert into matrix */ 1557d65a2f8fSBarry Smith jj = rstart; 1558d65a2f8fSBarry Smith smycols = mycols; 1559d65a2f8fSBarry Smith svals = vals; 1560d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1561d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1562d65a2f8fSBarry Smith smycols += ourlens[i]; 1563d65a2f8fSBarry Smith svals += ourlens[i]; 1564d65a2f8fSBarry Smith jj++; 1565d65a2f8fSBarry Smith } 1566d65a2f8fSBarry Smith } 15670452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1568d65a2f8fSBarry Smith 15696d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15706d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1571416022c9SBarry Smith return 0; 1572416022c9SBarry Smith } 1573