1cb512458SBarry Smith #ifndef lint 2*c351683dSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.172 1996/11/01 23:25:33 balay Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 140a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18905e6a2fSBarry Smith int n = B->n,i; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 293b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 303b2fbd54SBarry Smith PetscTruth *done) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 3417699dbbSLois Curfman McInnes if (aij->size == 1) { 353b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 363b2fbd54SBarry Smith } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 373b2fbd54SBarry Smith return 0; 383b2fbd54SBarry Smith } 393b2fbd54SBarry Smith 403b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 413b2fbd54SBarry Smith PetscTruth *done) 423b2fbd54SBarry Smith { 433b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 443b2fbd54SBarry Smith int ierr; 453b2fbd54SBarry Smith if (aij->size == 1) { 463b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 473b2fbd54SBarry Smith } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 48299609e3SLois Curfman McInnes return 0; 49299609e3SLois Curfman McInnes } 50299609e3SLois Curfman McInnes 510a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 520a198c4cSBarry Smith 534b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 548a729477SBarry Smith { 5544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 564b0e389bSBarry Smith Scalar value; 571eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 581eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 59905e6a2fSBarry Smith int roworiented = aij->roworiented; 608a729477SBarry Smith 610a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 6264119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 6348d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 648a729477SBarry Smith } 650a198c4cSBarry Smith #endif 661eb62cbbSBarry Smith aij->insertmode = addv; 678a729477SBarry Smith for ( i=0; i<m; i++ ) { 680a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 694b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 704b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 710a198c4cSBarry Smith #endif 724b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 734b0e389bSBarry Smith row = im[i] - rstart; 741eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 754b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 764b0e389bSBarry Smith col = in[j] - cstart; 774b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 780a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 791eb62cbbSBarry Smith } 800a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 810a198c4cSBarry Smith else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 820a198c4cSBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 830a198c4cSBarry Smith #endif 841eb62cbbSBarry Smith else { 85227d817aSBarry Smith if (mat->was_assembled) { 86905e6a2fSBarry Smith if (!aij->colmap) { 87905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 88905e6a2fSBarry Smith } 89905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 90ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 912493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 924b0e389bSBarry Smith col = in[j]; 93d6dfbf8fSBarry Smith } 949e25ed09SBarry Smith } 954b0e389bSBarry Smith else col = in[j]; 964b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 970a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 981eb62cbbSBarry Smith } 991eb62cbbSBarry Smith } 1001eb62cbbSBarry Smith } 1011eb62cbbSBarry Smith else { 1024b0e389bSBarry Smith if (roworiented) { 1034b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 1044b0e389bSBarry Smith } 1054b0e389bSBarry Smith else { 1064b0e389bSBarry Smith row = im[i]; 1074b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 1084b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 1094b0e389bSBarry Smith } 1104b0e389bSBarry Smith } 1111eb62cbbSBarry Smith } 1128a729477SBarry Smith } 1138a729477SBarry Smith return 0; 1148a729477SBarry Smith } 1158a729477SBarry Smith 116b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 117b49de8d1SLois Curfman McInnes { 118b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 119b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 120b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 121b49de8d1SLois Curfman McInnes 122b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 123b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 124b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 125b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 126b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 127b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 128b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 129b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 130b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 131b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 132b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 133b49de8d1SLois Curfman McInnes } 134b49de8d1SLois Curfman McInnes else { 135905e6a2fSBarry Smith if (!aij->colmap) { 136905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 137905e6a2fSBarry Smith } 138905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 139e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 140d9d09a02SSatish Balay else { 141b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 142b49de8d1SLois Curfman McInnes } 143b49de8d1SLois Curfman McInnes } 144b49de8d1SLois Curfman McInnes } 145d9d09a02SSatish Balay } 146b49de8d1SLois Curfman McInnes else { 147b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 148b49de8d1SLois Curfman McInnes } 149b49de8d1SLois Curfman McInnes } 150b49de8d1SLois Curfman McInnes return 0; 151b49de8d1SLois Curfman McInnes } 152b49de8d1SLois Curfman McInnes 153fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1548a729477SBarry Smith { 15544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 156d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 15717699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 15817699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1591eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1606abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1611eb62cbbSBarry Smith InsertMode addv; 1621eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1631eb62cbbSBarry Smith 1641eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 165d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 166dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 167bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1681eb62cbbSBarry Smith } 1691eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1701eb62cbbSBarry Smith 1711eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1720452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 173cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1740452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1751eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1761eb62cbbSBarry Smith idx = aij->stash.idx[i]; 17717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1781eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1791eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1808a729477SBarry Smith } 1818a729477SBarry Smith } 1828a729477SBarry Smith } 18317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1841eb62cbbSBarry Smith 1851eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1860452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 18717699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 18817699dbbSLois Curfman McInnes nreceives = work[rank]; 18917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 19017699dbbSLois Curfman McInnes nmax = work[rank]; 1910452661fSBarry Smith PetscFree(work); 1921eb62cbbSBarry Smith 1931eb62cbbSBarry Smith /* post receives: 1941eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1951eb62cbbSBarry Smith (global index,value) we store the global index as a double 196d6dfbf8fSBarry Smith to simplify the message passing. 1971eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1981eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1991eb62cbbSBarry Smith this is a lot of wasted space. 2001eb62cbbSBarry Smith 2011eb62cbbSBarry Smith 2021eb62cbbSBarry Smith This could be done better. 2031eb62cbbSBarry Smith */ 2040452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 20578b31e54SBarry Smith CHKPTRQ(rvalues); 2060452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 20778b31e54SBarry Smith CHKPTRQ(recv_waits); 2081eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 209416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 2101eb62cbbSBarry Smith comm,recv_waits+i); 2111eb62cbbSBarry Smith } 2121eb62cbbSBarry Smith 2131eb62cbbSBarry Smith /* do sends: 2141eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 2151eb62cbbSBarry Smith the ith processor 2161eb62cbbSBarry Smith */ 2170452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 2180452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 21978b31e54SBarry Smith CHKPTRQ(send_waits); 2200452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2211eb62cbbSBarry Smith starts[0] = 0; 22217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2231eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2241eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2251eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2261eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2271eb62cbbSBarry Smith } 2280452661fSBarry Smith PetscFree(owner); 2291eb62cbbSBarry Smith starts[0] = 0; 23017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2311eb62cbbSBarry Smith count = 0; 23217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2331eb62cbbSBarry Smith if (procs[i]) { 234416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2351eb62cbbSBarry Smith comm,send_waits+count++); 2361eb62cbbSBarry Smith } 2371eb62cbbSBarry Smith } 2380452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2391eb62cbbSBarry Smith 2401eb62cbbSBarry Smith /* Free cache space */ 2412191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 24278b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2431eb62cbbSBarry Smith 2441eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2451eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2461eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2471eb62cbbSBarry Smith aij->rmax = nmax; 2481eb62cbbSBarry Smith 2491eb62cbbSBarry Smith return 0; 2501eb62cbbSBarry Smith } 25144a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2521eb62cbbSBarry Smith 253fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2541eb62cbbSBarry Smith { 25544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2561eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 257416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 258905e6a2fSBarry Smith int row,col,other_disassembled; 2591eb62cbbSBarry Smith Scalar *values,val; 2601eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2611eb62cbbSBarry Smith 2621eb62cbbSBarry Smith /* wait on receives */ 2631eb62cbbSBarry Smith while (count) { 264d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2651eb62cbbSBarry Smith /* unpack receives into our local space */ 266d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 267c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2681eb62cbbSBarry Smith n = n/3; 2691eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 270227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 271227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2721eb62cbbSBarry Smith val = values[3*i+2]; 2731eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2741eb62cbbSBarry Smith col -= aij->cstart; 2751eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2761eb62cbbSBarry Smith } 2771eb62cbbSBarry Smith else { 27855a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 279905e6a2fSBarry Smith if (!aij->colmap) { 280905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 281905e6a2fSBarry Smith } 282905e6a2fSBarry Smith col = aij->colmap[col] - 1; 283ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2842493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 285227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 286d6dfbf8fSBarry Smith } 2879e25ed09SBarry Smith } 2881eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2891eb62cbbSBarry Smith } 2901eb62cbbSBarry Smith } 2911eb62cbbSBarry Smith count--; 2921eb62cbbSBarry Smith } 2930452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2941eb62cbbSBarry Smith 2951eb62cbbSBarry Smith /* wait on sends */ 2961eb62cbbSBarry Smith if (aij->nsends) { 2970a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 2981eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2990452661fSBarry Smith PetscFree(send_status); 3001eb62cbbSBarry Smith } 3010452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 3021eb62cbbSBarry Smith 30364119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 30478b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 30578b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 3061eb62cbbSBarry Smith 3072493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 3082493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 309227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 310227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 3112493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 3122493cbb0SBarry Smith } 3132493cbb0SBarry Smith 3146d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 31578b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 3165e42470aSBarry Smith } 31778b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 31878b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3195e42470aSBarry Smith 3207a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3218a729477SBarry Smith return 0; 3228a729477SBarry Smith } 3238a729477SBarry Smith 3242ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3251eb62cbbSBarry Smith { 32644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 327dbd7a890SLois Curfman McInnes int ierr; 32878b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 32978b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3301eb62cbbSBarry Smith return 0; 3311eb62cbbSBarry Smith } 3321eb62cbbSBarry Smith 3331eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3341eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3351eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3361eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3371eb62cbbSBarry Smith routine. 3381eb62cbbSBarry Smith */ 33944a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3401eb62cbbSBarry Smith { 34144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 34217699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3436abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 34417699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3455392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 346d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 347d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3481eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3491eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3501eb62cbbSBarry Smith IS istmp; 3511eb62cbbSBarry Smith 35277c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 35378b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3541eb62cbbSBarry Smith 3551eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3560452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 357cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3580452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3591eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3601eb62cbbSBarry Smith idx = rows[i]; 3611eb62cbbSBarry Smith found = 0; 36217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3631eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3641eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3651eb62cbbSBarry Smith } 3661eb62cbbSBarry Smith } 367bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3681eb62cbbSBarry Smith } 36917699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3701eb62cbbSBarry Smith 3711eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3720452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 37317699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 37417699dbbSLois Curfman McInnes nrecvs = work[rank]; 37517699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 37617699dbbSLois Curfman McInnes nmax = work[rank]; 3770452661fSBarry Smith PetscFree(work); 3781eb62cbbSBarry Smith 3791eb62cbbSBarry Smith /* post receives: */ 3800452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 38178b31e54SBarry Smith CHKPTRQ(rvalues); 3820452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 38378b31e54SBarry Smith CHKPTRQ(recv_waits); 3841eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 385416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3861eb62cbbSBarry Smith } 3871eb62cbbSBarry Smith 3881eb62cbbSBarry Smith /* do sends: 3891eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3901eb62cbbSBarry Smith the ith processor 3911eb62cbbSBarry Smith */ 3920452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 39478b31e54SBarry Smith CHKPTRQ(send_waits); 3950452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3961eb62cbbSBarry Smith starts[0] = 0; 39717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3981eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3991eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4001eb62cbbSBarry Smith } 4011eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 4021eb62cbbSBarry Smith 4031eb62cbbSBarry Smith starts[0] = 0; 40417699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4051eb62cbbSBarry Smith count = 0; 40617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4071eb62cbbSBarry Smith if (procs[i]) { 408416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 4091eb62cbbSBarry Smith } 4101eb62cbbSBarry Smith } 4110452661fSBarry Smith PetscFree(starts); 4121eb62cbbSBarry Smith 41317699dbbSLois Curfman McInnes base = owners[rank]; 4141eb62cbbSBarry Smith 4151eb62cbbSBarry Smith /* wait on receives */ 4160452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 4171eb62cbbSBarry Smith source = lens + nrecvs; 4181eb62cbbSBarry Smith count = nrecvs; slen = 0; 4191eb62cbbSBarry Smith while (count) { 420d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4211eb62cbbSBarry Smith /* unpack receives into our local space */ 4221eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 423d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 424d6dfbf8fSBarry Smith lens[imdex] = n; 4251eb62cbbSBarry Smith slen += n; 4261eb62cbbSBarry Smith count--; 4271eb62cbbSBarry Smith } 4280452661fSBarry Smith PetscFree(recv_waits); 4291eb62cbbSBarry Smith 4301eb62cbbSBarry Smith /* move the data into the send scatter */ 4310452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4321eb62cbbSBarry Smith count = 0; 4331eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4341eb62cbbSBarry Smith values = rvalues + i*nmax; 4351eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4361eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4371eb62cbbSBarry Smith } 4381eb62cbbSBarry Smith } 4390452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4400452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4411eb62cbbSBarry Smith 4421eb62cbbSBarry Smith /* actually zap the local rows */ 443537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 444464493b3SBarry Smith PLogObjectParent(A,istmp); 4450452661fSBarry Smith PetscFree(lrows); 44678b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 44778b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 44878b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4491eb62cbbSBarry Smith 4501eb62cbbSBarry Smith /* wait on sends */ 4511eb62cbbSBarry Smith if (nsends) { 4520452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 45378b31e54SBarry Smith CHKPTRQ(send_status); 4541eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4550452661fSBarry Smith PetscFree(send_status); 4561eb62cbbSBarry Smith } 4570452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4581eb62cbbSBarry Smith 4591eb62cbbSBarry Smith return 0; 4601eb62cbbSBarry Smith } 4611eb62cbbSBarry Smith 462416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4631eb62cbbSBarry Smith { 464416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 465fbd6ef76SBarry Smith int ierr,nt; 466416022c9SBarry Smith 467a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 468fbd6ef76SBarry Smith if (nt != a->n) { 46947b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 470fbd6ef76SBarry Smith } 47164119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 47238597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 47364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 47438597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4751eb62cbbSBarry Smith return 0; 4761eb62cbbSBarry Smith } 4771eb62cbbSBarry Smith 478416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 479da3a660dSBarry Smith { 480416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 481da3a660dSBarry Smith int ierr; 48264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 48338597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 48464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 48538597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 486da3a660dSBarry Smith return 0; 487da3a660dSBarry Smith } 488da3a660dSBarry Smith 489416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 490da3a660dSBarry Smith { 491416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 492da3a660dSBarry Smith int ierr; 493da3a660dSBarry Smith 494da3a660dSBarry Smith /* do nondiagonal part */ 49538597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 496da3a660dSBarry Smith /* send it on its way */ 497537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 498da3a660dSBarry Smith /* do local part */ 49938597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 500da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 501da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 502da3a660dSBarry Smith /* but is not perhaps always true. */ 503537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 504da3a660dSBarry Smith return 0; 505da3a660dSBarry Smith } 506da3a660dSBarry Smith 507416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 508da3a660dSBarry Smith { 509416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 510da3a660dSBarry Smith int ierr; 511da3a660dSBarry Smith 512da3a660dSBarry Smith /* do nondiagonal part */ 51338597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 514da3a660dSBarry Smith /* send it on its way */ 515537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 516da3a660dSBarry Smith /* do local part */ 51738597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 518da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 519da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 520da3a660dSBarry Smith /* but is not perhaps always true. */ 5210a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 522da3a660dSBarry Smith return 0; 523da3a660dSBarry Smith } 524da3a660dSBarry Smith 5251eb62cbbSBarry Smith /* 5261eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5271eb62cbbSBarry Smith diagonal block 5281eb62cbbSBarry Smith */ 529416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5301eb62cbbSBarry Smith { 531416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 53244cd7ae7SLois Curfman McInnes if (a->M != a->N) 53344cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 5345baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 5355baf8537SBarry Smith SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 536416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5371eb62cbbSBarry Smith } 5381eb62cbbSBarry Smith 539052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 540052efed2SBarry Smith { 541052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 542052efed2SBarry Smith int ierr; 543052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 544052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 545052efed2SBarry Smith return 0; 546052efed2SBarry Smith } 547052efed2SBarry Smith 54844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5491eb62cbbSBarry Smith { 5501eb62cbbSBarry Smith Mat mat = (Mat) obj; 55144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5521eb62cbbSBarry Smith int ierr; 553a5a9c739SBarry Smith #if defined(PETSC_LOG) 5546e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 555a5a9c739SBarry Smith #endif 5560452661fSBarry Smith PetscFree(aij->rowners); 55778b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 55878b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5590452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5600452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5611eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 562a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5637a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5640452661fSBarry Smith PetscFree(aij); 565a5a9c739SBarry Smith PLogObjectDestroy(mat); 5660452661fSBarry Smith PetscHeaderDestroy(mat); 5671eb62cbbSBarry Smith return 0; 5681eb62cbbSBarry Smith } 5697857610eSBarry Smith #include "draw.h" 570b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 571ee50ffe9SBarry Smith 572416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5731eb62cbbSBarry Smith { 574416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 575416022c9SBarry Smith int ierr; 576416022c9SBarry Smith 57717699dbbSLois Curfman McInnes if (aij->size == 1) { 578416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 579416022c9SBarry Smith } 580a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 581416022c9SBarry Smith return 0; 582416022c9SBarry Smith } 583416022c9SBarry Smith 584d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 585416022c9SBarry Smith { 58644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 587dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 588a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 589d636dbe3SBarry Smith FILE *fd; 59019bcc07fSBarry Smith ViewerType vtype; 591416022c9SBarry Smith 59219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 59319bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 59490ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 5950a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5964e220ebcSLois Curfman McInnes MatInfo info; 5974e220ebcSLois Curfman McInnes int flg; 598a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 59990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6004e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 60195e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 60277c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 60395e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 6044e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 60595e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 6064e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 6074e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 6084e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 6094e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 6104e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 611a56f8943SBarry Smith fflush(fd); 61277c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 613a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 614416022c9SBarry Smith return 0; 615416022c9SBarry Smith } 6160a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 61708480c60SBarry Smith return 0; 61808480c60SBarry Smith } 619416022c9SBarry Smith } 620416022c9SBarry Smith 62119bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 62219bcc07fSBarry Smith Draw draw; 62319bcc07fSBarry Smith PetscTruth isnull; 62419bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 62519bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 62619bcc07fSBarry Smith } 62719bcc07fSBarry Smith 62819bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 62990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 63077c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 631d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 63217699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6331eb62cbbSBarry Smith aij->cend); 63478b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 63578b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 636d13ab20cSBarry Smith fflush(fd); 63777c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 638d13ab20cSBarry Smith } 639416022c9SBarry Smith else { 640a56f8943SBarry Smith int size = aij->size; 641a56f8943SBarry Smith rank = aij->rank; 64217699dbbSLois Curfman McInnes if (size == 1) { 64378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 64495373324SBarry Smith } 64595373324SBarry Smith else { 64695373324SBarry Smith /* assemble the entire matrix onto first processor. */ 64795373324SBarry Smith Mat A; 648ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6492eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 65095373324SBarry Smith Scalar *a; 6512ee70a88SLois Curfman McInnes 65217699dbbSLois Curfman McInnes if (!rank) { 653b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 654c451ab25SLois Curfman McInnes CHKERRQ(ierr); 65595373324SBarry Smith } 65695373324SBarry Smith else { 657b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 658c451ab25SLois Curfman McInnes CHKERRQ(ierr); 65995373324SBarry Smith } 660464493b3SBarry Smith PLogObjectParent(mat,A); 661416022c9SBarry Smith 66295373324SBarry Smith /* copy over the A part */ 663ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6642ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66595373324SBarry Smith row = aij->rstart; 666dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 66795373324SBarry Smith for ( i=0; i<m; i++ ) { 668416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 66995373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 67095373324SBarry Smith } 6712ee70a88SLois Curfman McInnes aj = Aloc->j; 672dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 67395373324SBarry Smith 67495373324SBarry Smith /* copy over the B part */ 675ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6762ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 67795373324SBarry Smith row = aij->rstart; 6780452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 679dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 68095373324SBarry Smith for ( i=0; i<m; i++ ) { 681416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 68295373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 68395373324SBarry Smith } 6840452661fSBarry Smith PetscFree(ct); 6856d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6866d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 68717699dbbSLois Curfman McInnes if (!rank) { 68878b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 68995373324SBarry Smith } 69078b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 69195373324SBarry Smith } 69295373324SBarry Smith } 6931eb62cbbSBarry Smith return 0; 6941eb62cbbSBarry Smith } 6951eb62cbbSBarry Smith 696416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 697416022c9SBarry Smith { 698416022c9SBarry Smith Mat mat = (Mat) obj; 699416022c9SBarry Smith int ierr; 70019bcc07fSBarry Smith ViewerType vtype; 701416022c9SBarry Smith 70219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 70319bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 70419bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 705d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 706416022c9SBarry Smith } 70719bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 708416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 709416022c9SBarry Smith } 710416022c9SBarry Smith return 0; 711416022c9SBarry Smith } 712416022c9SBarry Smith 7131eb62cbbSBarry Smith /* 7141eb62cbbSBarry Smith This has to provide several versions. 7151eb62cbbSBarry Smith 7161eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7171eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 718d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7191eb62cbbSBarry Smith */ 720fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 721dbb450caSBarry Smith double fshift,int its,Vec xx) 7228a729477SBarry Smith { 72344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 724d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 725ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 726c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7276abc6512SBarry Smith int ierr,*idx, *diag; 728416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7298a729477SBarry Smith 730d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 731dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 732dbb450caSBarry Smith ls = ls + shift; 733ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 734d6dfbf8fSBarry Smith diag = A->diag; 735c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 736da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 73738597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 738da3a660dSBarry Smith } 73964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 74078b31e54SBarry Smith CHKERRQ(ierr); 74164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 74278b31e54SBarry Smith CHKERRQ(ierr); 743d6dfbf8fSBarry Smith while (its--) { 744d6dfbf8fSBarry Smith /* go down through the rows */ 745d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 746d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 747dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 748dbb450caSBarry Smith v = A->a + A->i[i] + shift; 749d6dfbf8fSBarry Smith sum = b[i]; 750d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 751dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 752d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 753dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 754dbb450caSBarry Smith v = B->a + B->i[i] + shift; 755d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 75655a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 757d6dfbf8fSBarry Smith } 758d6dfbf8fSBarry Smith /* come up through the rows */ 759d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 760d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 761dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 762dbb450caSBarry Smith v = A->a + A->i[i] + shift; 763d6dfbf8fSBarry Smith sum = b[i]; 764d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 765dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 766d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 767dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 768dbb450caSBarry Smith v = B->a + B->i[i] + shift; 769d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 77055a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 771d6dfbf8fSBarry Smith } 772d6dfbf8fSBarry Smith } 773d6dfbf8fSBarry Smith } 774d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 775da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 77638597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 777da3a660dSBarry Smith } 77864119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 77978b31e54SBarry Smith CHKERRQ(ierr); 78064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 78178b31e54SBarry Smith CHKERRQ(ierr); 782d6dfbf8fSBarry Smith while (its--) { 783d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 784d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 785dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 786dbb450caSBarry Smith v = A->a + A->i[i] + shift; 787d6dfbf8fSBarry Smith sum = b[i]; 788d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 789dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 790d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 791dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 792dbb450caSBarry Smith v = B->a + B->i[i] + shift; 793d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 79455a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 795d6dfbf8fSBarry Smith } 796d6dfbf8fSBarry Smith } 797d6dfbf8fSBarry Smith } 798d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 799da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 80038597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 801da3a660dSBarry Smith } 80264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 80378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 80464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 80578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 806d6dfbf8fSBarry Smith while (its--) { 807d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 808d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 809dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 810dbb450caSBarry Smith v = A->a + A->i[i] + shift; 811d6dfbf8fSBarry Smith sum = b[i]; 812d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 813dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 814d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 815dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 816dbb450caSBarry Smith v = B->a + B->i[i] + shift; 817d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 81855a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 819d6dfbf8fSBarry Smith } 820d6dfbf8fSBarry Smith } 821d6dfbf8fSBarry Smith } 822c16cb8f2SBarry Smith else { 823c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 824c16cb8f2SBarry Smith } 8258a729477SBarry Smith return 0; 8268a729477SBarry Smith } 827a66be287SLois Curfman McInnes 8284e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 829a66be287SLois Curfman McInnes { 830a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 831a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 8324e220ebcSLois Curfman McInnes int ierr; 8334e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 834a66be287SLois Curfman McInnes 8354e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 8364e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 8374e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 8384e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 8394e220ebcSLois Curfman McInnes info->block_size = 1.0; 8404e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 8414e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 8424e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 8434e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 8444e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 8454e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 846a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 8474e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 8484e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 8494e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 8504e220ebcSLois Curfman McInnes info->memory = isend[3]; 8514e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 852a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 8534e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 8544e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8554e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8564e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8574e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8584e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 859a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 8604e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 8614e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8624e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8634e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8644e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8654e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 866a66be287SLois Curfman McInnes } 8674e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8684e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 8694e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 8704e220ebcSLois Curfman McInnes 871a66be287SLois Curfman McInnes return 0; 872a66be287SLois Curfman McInnes } 873a66be287SLois Curfman McInnes 874299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 875299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 876299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 877299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 878299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 879299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 880299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 881299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 882299609e3SLois Curfman McInnes 883416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 884c74985f6SBarry Smith { 885c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 886c74985f6SBarry Smith 8876d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 8886d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 8896d4a8577SBarry Smith op == MAT_COLUMNS_SORTED || 8906d4a8577SBarry Smith op == MAT_ROW_ORIENTED) { 891c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 892c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 893c74985f6SBarry Smith } 8946d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 8956d4a8577SBarry Smith op == MAT_SYMMETRIC || 8966d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 8976d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 89894a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 8996d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 9004b0e389bSBarry Smith a->roworiented = 0; 9014b0e389bSBarry Smith MatSetOption(a->A,op); 9024b0e389bSBarry Smith MatSetOption(a->B,op); 9034b0e389bSBarry Smith } 9046d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 9056d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 906c0bbcb79SLois Curfman McInnes else 9074d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 908c74985f6SBarry Smith return 0; 909c74985f6SBarry Smith } 910c74985f6SBarry Smith 911d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 912c74985f6SBarry Smith { 91344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 914c74985f6SBarry Smith *m = mat->M; *n = mat->N; 915c74985f6SBarry Smith return 0; 916c74985f6SBarry Smith } 917c74985f6SBarry Smith 918d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 919c74985f6SBarry Smith { 92044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 921b7c46309SBarry Smith *m = mat->m; *n = mat->N; 922c74985f6SBarry Smith return 0; 923c74985f6SBarry Smith } 924c74985f6SBarry Smith 925d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 926c74985f6SBarry Smith { 92744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 928c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 929c74985f6SBarry Smith return 0; 930c74985f6SBarry Smith } 931c74985f6SBarry Smith 9326d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9336d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9346d84be18SBarry Smith 9356d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 93639e00950SLois Curfman McInnes { 937154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 93870f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 939154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 940154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 94170f0671dSBarry Smith int *cmap, *idx_p; 94239e00950SLois Curfman McInnes 9437a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 9447a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 9457a0afa10SBarry Smith 94670f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 9477a0afa10SBarry Smith /* 9487a0afa10SBarry Smith allocate enough space to hold information from the longest row. 9497a0afa10SBarry Smith */ 9507a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 951c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 952c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 9537a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 9547a0afa10SBarry Smith if (max < tmp) { max = tmp; } 9557a0afa10SBarry Smith } 9567a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 9577a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 9587a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 9597a0afa10SBarry Smith } 9607a0afa10SBarry Smith 961b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 962abc0e9e4SLois Curfman McInnes lrow = row - rstart; 96339e00950SLois Curfman McInnes 964154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 965154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 966154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 96738597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 96838597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 969154123eaSLois Curfman McInnes nztot = nzA + nzB; 970154123eaSLois Curfman McInnes 97170f0671dSBarry Smith cmap = mat->garray; 972154123eaSLois Curfman McInnes if (v || idx) { 973154123eaSLois Curfman McInnes if (nztot) { 974154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 97570f0671dSBarry Smith int imark = -1; 976154123eaSLois Curfman McInnes if (v) { 97770f0671dSBarry Smith *v = v_p = mat->rowvalues; 97839e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 97970f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 980154123eaSLois Curfman McInnes else break; 981154123eaSLois Curfman McInnes } 982154123eaSLois Curfman McInnes imark = i; 98370f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 98470f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 985154123eaSLois Curfman McInnes } 986154123eaSLois Curfman McInnes if (idx) { 98770f0671dSBarry Smith *idx = idx_p = mat->rowindices; 98870f0671dSBarry Smith if (imark > -1) { 98970f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 99070f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 99170f0671dSBarry Smith } 99270f0671dSBarry Smith } else { 993154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 99470f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 995154123eaSLois Curfman McInnes else break; 996154123eaSLois Curfman McInnes } 997154123eaSLois Curfman McInnes imark = i; 99870f0671dSBarry Smith } 99970f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 100070f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 100139e00950SLois Curfman McInnes } 100239e00950SLois Curfman McInnes } 10031ca473b0SSatish Balay else { 10041ca473b0SSatish Balay if (idx) *idx = 0; 10051ca473b0SSatish Balay if (v) *v = 0; 10061ca473b0SSatish Balay } 1007154123eaSLois Curfman McInnes } 100839e00950SLois Curfman McInnes *nz = nztot; 100938597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 101038597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 101139e00950SLois Curfman McInnes return 0; 101239e00950SLois Curfman McInnes } 101339e00950SLois Curfman McInnes 10146d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 101539e00950SLois Curfman McInnes { 10167a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 10177a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 10187a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 10197a0afa10SBarry Smith } 10207a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 102139e00950SLois Curfman McInnes return 0; 102239e00950SLois Curfman McInnes } 102339e00950SLois Curfman McInnes 1024cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1025855ac2c5SLois Curfman McInnes { 1026855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1027ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1028416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1029416022c9SBarry Smith double sum = 0.0; 103004ca555eSLois Curfman McInnes Scalar *v; 103104ca555eSLois Curfman McInnes 103217699dbbSLois Curfman McInnes if (aij->size == 1) { 103314183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 103437fa93a5SLois Curfman McInnes } else { 103504ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 103604ca555eSLois Curfman McInnes v = amat->a; 103704ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 103804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 103904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 104004ca555eSLois Curfman McInnes #else 104104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 104204ca555eSLois Curfman McInnes #endif 104304ca555eSLois Curfman McInnes } 104404ca555eSLois Curfman McInnes v = bmat->a; 104504ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 104604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 104704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 104804ca555eSLois Curfman McInnes #else 104904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 105004ca555eSLois Curfman McInnes #endif 105104ca555eSLois Curfman McInnes } 10526d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 105304ca555eSLois Curfman McInnes *norm = sqrt(*norm); 105404ca555eSLois Curfman McInnes } 105504ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 105604ca555eSLois Curfman McInnes double *tmp, *tmp2; 105704ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 10580452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 10590452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1060cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 106104ca555eSLois Curfman McInnes *norm = 0.0; 106204ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 106304ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1064579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 106504ca555eSLois Curfman McInnes } 106604ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 106704ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1068579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 106904ca555eSLois Curfman McInnes } 10706d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 107104ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 107204ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 107304ca555eSLois Curfman McInnes } 10740452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 107504ca555eSLois Curfman McInnes } 107604ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1077515d9167SLois Curfman McInnes double ntemp = 0.0; 107804ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1079dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 108004ca555eSLois Curfman McInnes sum = 0.0; 108104ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1082cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 108304ca555eSLois Curfman McInnes } 1084dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 108504ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1086cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 108704ca555eSLois Curfman McInnes } 1088515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 108904ca555eSLois Curfman McInnes } 10906d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 109104ca555eSLois Curfman McInnes } 109204ca555eSLois Curfman McInnes else { 1093515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 109404ca555eSLois Curfman McInnes } 109537fa93a5SLois Curfman McInnes } 1096855ac2c5SLois Curfman McInnes return 0; 1097855ac2c5SLois Curfman McInnes } 1098855ac2c5SLois Curfman McInnes 10990de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1100b7c46309SBarry Smith { 1101b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1102dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1103416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1104416022c9SBarry Smith Mat B; 1105b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1106b7c46309SBarry Smith Scalar *array; 1107b7c46309SBarry Smith 11083638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 11093638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1110b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1111b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1112b7c46309SBarry Smith 1113b7c46309SBarry Smith /* copy over the A part */ 1114ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1115b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1116b7c46309SBarry Smith row = a->rstart; 1117dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1118b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1119416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1120b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1121b7c46309SBarry Smith } 1122b7c46309SBarry Smith aj = Aloc->j; 11234af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1124b7c46309SBarry Smith 1125b7c46309SBarry Smith /* copy over the B part */ 1126ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1127b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1128b7c46309SBarry Smith row = a->rstart; 11290452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1130dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1131b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1132416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1133b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1134b7c46309SBarry Smith } 11350452661fSBarry Smith PetscFree(ct); 11366d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11376d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11383638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 11390de55854SLois Curfman McInnes *matout = B; 11400de55854SLois Curfman McInnes } else { 11410de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 11420452661fSBarry Smith PetscFree(a->rowners); 11430de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 11440de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 11450452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 11460452661fSBarry Smith if (a->garray) PetscFree(a->garray); 11470de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1148a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 11490452661fSBarry Smith PetscFree(a); 1150416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 11510452661fSBarry Smith PetscHeaderDestroy(B); 11520de55854SLois Curfman McInnes } 1153b7c46309SBarry Smith return 0; 1154b7c46309SBarry Smith } 1155b7c46309SBarry Smith 11564b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1157a008b906SSatish Balay { 11584b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 11594b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1160a008b906SSatish Balay int ierr,s1,s2,s3; 1161a008b906SSatish Balay 11624b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 11634b967eb1SSatish Balay if (rr) { 11644b967eb1SSatish Balay s3 = aij->n; 11654b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 11664b967eb1SSatish Balay if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 11674b967eb1SSatish Balay /* Overlap communication with computation. */ 11684b967eb1SSatish Balay ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1169a008b906SSatish Balay } 11704b967eb1SSatish Balay if (ll) { 11714b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 11724b967eb1SSatish Balay if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1173*c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 11744b967eb1SSatish Balay } 11754b967eb1SSatish Balay /* scale the diagonal block */ 1176*c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 11774b967eb1SSatish Balay 11784b967eb1SSatish Balay if (rr) { 11794b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 11804b967eb1SSatish Balay ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1181*c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 11824b967eb1SSatish Balay } 11834b967eb1SSatish Balay 1184a008b906SSatish Balay return 0; 1185a008b906SSatish Balay } 1186a008b906SSatish Balay 1187a008b906SSatish Balay 1188682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1189682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1190682d7d0cSBarry Smith { 1191682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1192682d7d0cSBarry Smith 1193682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1194682d7d0cSBarry Smith else return 0; 1195682d7d0cSBarry Smith } 1196682d7d0cSBarry Smith 11975a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 11985a838052SSatish Balay { 11995a838052SSatish Balay *bs = 1; 12005a838052SSatish Balay return 0; 12015a838052SSatish Balay } 12025a838052SSatish Balay 1203fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12043d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12052f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 12060a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 12070a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 12088a729477SBarry Smith /* -------------------------------------------------------------------*/ 12092ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 121039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 121144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 121244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 121336ce4990SBarry Smith 0,0, 121436ce4990SBarry Smith 0,0, 121536ce4990SBarry Smith 0,0, 121644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1217b7c46309SBarry Smith MatTranspose_MPIAIJ, 1218a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1219a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1220ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12211eb62cbbSBarry Smith 0, 1222299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 122336ce4990SBarry Smith 0,0,0,0, 1224d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 122536ce4990SBarry Smith 0,0, 12263e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1227b49de8d1SLois Curfman McInnes 0,0,0, 1228598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1229052efed2SBarry Smith MatPrintHelp_MPIAIJ, 12303b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 12310a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 12320a198c4cSBarry Smith MatFDColoringCreate_MPIAIJ}; 123336ce4990SBarry Smith 12348a729477SBarry Smith 12351987afe7SBarry Smith /*@C 1236ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 12373a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 12383a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 12393a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 12403a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 12418a729477SBarry Smith 12428a729477SBarry Smith Input Parameters: 12431eb62cbbSBarry Smith . comm - MPI communicator 12447d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 124592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 124692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 124792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 124892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 124992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 12507d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 125192e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1252ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1253ff756334SLois Curfman McInnes (same for all local rows) 12542bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 125592e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 12562bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 12572bd5e0b2SLois Curfman McInnes it is zero. 12582bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1259ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 12602bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 12612bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 12622bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 12638a729477SBarry Smith 1264ff756334SLois Curfman McInnes Output Parameter: 126544cd7ae7SLois Curfman McInnes . A - the matrix 12668a729477SBarry Smith 1267ff756334SLois Curfman McInnes Notes: 1268ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1269ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12700002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12710002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1272ff756334SLois Curfman McInnes 1273ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1274ff756334SLois Curfman McInnes (possibly both). 1275ff756334SLois Curfman McInnes 12765511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 12775511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 12785511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 12795511cfe3SLois Curfman McInnes 12805511cfe3SLois Curfman McInnes Options Database Keys: 12815511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 12826e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 12836e62573dSLois Curfman McInnes $ (max limit=5) 12846e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 12856e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 12866e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 12875511cfe3SLois Curfman McInnes 1288e0245417SLois Curfman McInnes Storage Information: 1289e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1290e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1291e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1292e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1293e0245417SLois Curfman McInnes 1294e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 12955ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 12965ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 12975ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 12985ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1299ff756334SLois Curfman McInnes 13005511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 13015511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 13022191d07cSBarry Smith 1303b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1304b810aeb4SBarry Smith $ ------------------- 13055511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 13065511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 13075511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 13085511cfe3SLois Curfman McInnes $ ------------------- 1309b810aeb4SBarry Smith $ 1310b810aeb4SBarry Smith 13115511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 13125511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 13135511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 13145511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 13155511cfe3SLois Curfman McInnes 13165511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 13175511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 13185511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 13193d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 132092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 13213d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 13223a511b96SLois Curfman McInnes 1323dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1324ff756334SLois Curfman McInnes 1325fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13268a729477SBarry Smith @*/ 13271eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 132844cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 13298a729477SBarry Smith { 133044cd7ae7SLois Curfman McInnes Mat B; 133144cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 133236ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1333416022c9SBarry Smith 133444cd7ae7SLois Curfman McInnes *A = 0; 133544cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 133644cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 133744cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 133844cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 133944cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 134036ce4990SBarry Smith MPI_Comm_size(comm,&size); 134136ce4990SBarry Smith if (size == 1) { 134236ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 134336ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 134436ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 134536ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 134636ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 134736ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 134836ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 134936ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 135036ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 135136ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 135236ce4990SBarry Smith } 135344cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 135444cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 135544cd7ae7SLois Curfman McInnes B->factor = 0; 135644cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 1357d6dfbf8fSBarry Smith 135844cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 135944cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 136044cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 13611eb62cbbSBarry Smith 1362b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 13632e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 13641987afe7SBarry Smith 1365dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13661eb62cbbSBarry Smith work[0] = m; work[1] = n; 1367d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1368dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1369dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13701eb62cbbSBarry Smith } 137144cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 137244cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 137344cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 137444cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 137544cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 137644cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 13771eb62cbbSBarry Smith 13781eb62cbbSBarry Smith /* build local table of row and column ownerships */ 137944cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 138044cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1381603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 138244cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 138344cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 138444cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 138544cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 13868a729477SBarry Smith } 138744cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 138844cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 138944cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 139044cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 139144cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 139244cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 13938a729477SBarry Smith } 139444cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 139544cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 13968a729477SBarry Smith 13975ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 139844cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 139944cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 14007b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 140144cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 140244cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 14038a729477SBarry Smith 14041eb62cbbSBarry Smith /* build cache for off array entries formed */ 140544cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 140644cd7ae7SLois Curfman McInnes b->colmap = 0; 140744cd7ae7SLois Curfman McInnes b->garray = 0; 140844cd7ae7SLois Curfman McInnes b->roworiented = 1; 14098a729477SBarry Smith 14101eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 141144cd7ae7SLois Curfman McInnes b->lvec = 0; 141244cd7ae7SLois Curfman McInnes b->Mvctx = 0; 14138a729477SBarry Smith 14147a0afa10SBarry Smith /* stuff for MatGetRow() */ 141544cd7ae7SLois Curfman McInnes b->rowindices = 0; 141644cd7ae7SLois Curfman McInnes b->rowvalues = 0; 141744cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 14187a0afa10SBarry Smith 141944cd7ae7SLois Curfman McInnes *A = B; 1420d6dfbf8fSBarry Smith return 0; 1421d6dfbf8fSBarry Smith } 1422c74985f6SBarry Smith 14233d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1424d6dfbf8fSBarry Smith { 1425d6dfbf8fSBarry Smith Mat mat; 1426416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1427a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1428d6dfbf8fSBarry Smith 1429416022c9SBarry Smith *newmat = 0; 14300452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1431a5a9c739SBarry Smith PLogObjectCreate(mat); 14320452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1433416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 143444a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 143544a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1436d6dfbf8fSBarry Smith mat->factor = matin->factor; 1437c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1438d6dfbf8fSBarry Smith 143944cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 144044cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 144144cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 144244cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1443d6dfbf8fSBarry Smith 1444416022c9SBarry Smith a->rstart = oldmat->rstart; 1445416022c9SBarry Smith a->rend = oldmat->rend; 1446416022c9SBarry Smith a->cstart = oldmat->cstart; 1447416022c9SBarry Smith a->cend = oldmat->cend; 144817699dbbSLois Curfman McInnes a->size = oldmat->size; 144917699dbbSLois Curfman McInnes a->rank = oldmat->rank; 145064119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1451bcd2baecSBarry Smith a->rowvalues = 0; 1452bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1453d6dfbf8fSBarry Smith 1454603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1455603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1456603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1457603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1458416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14592ee70a88SLois Curfman McInnes if (oldmat->colmap) { 14600452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1461416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1462416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1463416022c9SBarry Smith } else a->colmap = 0; 14643f41c07dSBarry Smith if (oldmat->garray) { 14653f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 14663f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1467464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 14683f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1469416022c9SBarry Smith } else a->garray = 0; 1470d6dfbf8fSBarry Smith 1471416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1472416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1473a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1474416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1475416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1476416022c9SBarry Smith PLogObjectParent(mat,a->A); 1477416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1478416022c9SBarry Smith PLogObjectParent(mat,a->B); 14795dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 148025cdf11fSBarry Smith if (flg) { 1481682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1482682d7d0cSBarry Smith } 14838a729477SBarry Smith *newmat = mat; 14848a729477SBarry Smith return 0; 14858a729477SBarry Smith } 1486416022c9SBarry Smith 148777c4ece6SBarry Smith #include "sys.h" 1488416022c9SBarry Smith 148919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1490416022c9SBarry Smith { 1491d65a2f8fSBarry Smith Mat A; 1492416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1493d65a2f8fSBarry Smith Scalar *vals,*svals; 149419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1495416022c9SBarry Smith MPI_Status status; 149617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1497d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 149819bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1499416022c9SBarry Smith 150017699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 150117699dbbSLois Curfman McInnes if (!rank) { 150290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 150377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1504c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1505416022c9SBarry Smith } 1506416022c9SBarry Smith 1507416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1508416022c9SBarry Smith M = header[1]; N = header[2]; 1509416022c9SBarry Smith /* determine ownership of all rows */ 151017699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15110452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1512416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1513416022c9SBarry Smith rowners[0] = 0; 151417699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1515416022c9SBarry Smith rowners[i] += rowners[i-1]; 1516416022c9SBarry Smith } 151717699dbbSLois Curfman McInnes rstart = rowners[rank]; 151817699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1519416022c9SBarry Smith 1520416022c9SBarry Smith /* distribute row lengths to all processors */ 15210452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1522416022c9SBarry Smith offlens = ourlens + (rend-rstart); 152317699dbbSLois Curfman McInnes if (!rank) { 15240452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 152577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 15260452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 152717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1528d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15290452661fSBarry Smith PetscFree(sndcounts); 1530416022c9SBarry Smith } 1531416022c9SBarry Smith else { 1532416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1533416022c9SBarry Smith } 1534416022c9SBarry Smith 153517699dbbSLois Curfman McInnes if (!rank) { 1536416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15370452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1538cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 153917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1540416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1541416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1542416022c9SBarry Smith } 1543416022c9SBarry Smith } 15440452661fSBarry Smith PetscFree(rowlengths); 1545416022c9SBarry Smith 1546416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1547416022c9SBarry Smith maxnz = 0; 154817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15490452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1550416022c9SBarry Smith } 15510452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1552416022c9SBarry Smith 1553416022c9SBarry Smith /* read in my part of the matrix column indices */ 1554416022c9SBarry Smith nz = procsnz[0]; 15550452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 155677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1557d65a2f8fSBarry Smith 1558d65a2f8fSBarry Smith /* read in every one elses and ship off */ 155917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1560d65a2f8fSBarry Smith nz = procsnz[i]; 156177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1562d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1563d65a2f8fSBarry Smith } 15640452661fSBarry Smith PetscFree(cols); 1565416022c9SBarry Smith } 1566416022c9SBarry Smith else { 1567416022c9SBarry Smith /* determine buffer space needed for message */ 1568416022c9SBarry Smith nz = 0; 1569416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1570416022c9SBarry Smith nz += ourlens[i]; 1571416022c9SBarry Smith } 15720452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1573416022c9SBarry Smith 1574416022c9SBarry Smith /* receive message of column indices*/ 1575d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1576416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1577c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1578416022c9SBarry Smith } 1579416022c9SBarry Smith 1580416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1581cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1582416022c9SBarry Smith jj = 0; 1583416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1584416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1585d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1586416022c9SBarry Smith jj++; 1587416022c9SBarry Smith } 1588416022c9SBarry Smith } 1589d65a2f8fSBarry Smith 1590d65a2f8fSBarry Smith /* create our matrix */ 1591416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1592416022c9SBarry Smith ourlens[i] -= offlens[i]; 1593416022c9SBarry Smith } 1594d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1595d65a2f8fSBarry Smith A = *newmat; 15966d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1597d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1598d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1599d65a2f8fSBarry Smith } 1600416022c9SBarry Smith 160117699dbbSLois Curfman McInnes if (!rank) { 16020452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1603416022c9SBarry Smith 1604416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1605416022c9SBarry Smith nz = procsnz[0]; 160677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1607d65a2f8fSBarry Smith 1608d65a2f8fSBarry Smith /* insert into matrix */ 1609d65a2f8fSBarry Smith jj = rstart; 1610d65a2f8fSBarry Smith smycols = mycols; 1611d65a2f8fSBarry Smith svals = vals; 1612d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1613d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1614d65a2f8fSBarry Smith smycols += ourlens[i]; 1615d65a2f8fSBarry Smith svals += ourlens[i]; 1616d65a2f8fSBarry Smith jj++; 1617416022c9SBarry Smith } 1618416022c9SBarry Smith 1619d65a2f8fSBarry Smith /* read in other processors and ship out */ 162017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1621416022c9SBarry Smith nz = procsnz[i]; 162277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1623d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1624416022c9SBarry Smith } 16250452661fSBarry Smith PetscFree(procsnz); 1626416022c9SBarry Smith } 1627d65a2f8fSBarry Smith else { 1628d65a2f8fSBarry Smith /* receive numeric values */ 16290452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1630416022c9SBarry Smith 1631d65a2f8fSBarry Smith /* receive message of values*/ 1632d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1633d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1634c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1635d65a2f8fSBarry Smith 1636d65a2f8fSBarry Smith /* insert into matrix */ 1637d65a2f8fSBarry Smith jj = rstart; 1638d65a2f8fSBarry Smith smycols = mycols; 1639d65a2f8fSBarry Smith svals = vals; 1640d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1641d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1642d65a2f8fSBarry Smith smycols += ourlens[i]; 1643d65a2f8fSBarry Smith svals += ourlens[i]; 1644d65a2f8fSBarry Smith jj++; 1645d65a2f8fSBarry Smith } 1646d65a2f8fSBarry Smith } 16470452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1648d65a2f8fSBarry Smith 16496d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 16506d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1651416022c9SBarry Smith return 0; 1652416022c9SBarry Smith } 1653