1cb512458SBarry Smith #ifndef lint 2*43a90d84SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.176 1996/11/20 19:59:58 curfman Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 140a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18905e6a2fSBarry Smith int n = B->n,i; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23905e6a2fSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 293b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 303b2fbd54SBarry Smith PetscTruth *done) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 3417699dbbSLois Curfman McInnes if (aij->size == 1) { 353b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 363b2fbd54SBarry Smith } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 373b2fbd54SBarry Smith return 0; 383b2fbd54SBarry Smith } 393b2fbd54SBarry Smith 403b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 413b2fbd54SBarry Smith PetscTruth *done) 423b2fbd54SBarry Smith { 433b2fbd54SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 443b2fbd54SBarry Smith int ierr; 453b2fbd54SBarry Smith if (aij->size == 1) { 463b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 473b2fbd54SBarry Smith } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 48299609e3SLois Curfman McInnes return 0; 49299609e3SLois Curfman McInnes } 50299609e3SLois Curfman McInnes 510a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 520a198c4cSBarry Smith 534b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 548a729477SBarry Smith { 5544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 564b0e389bSBarry Smith Scalar value; 571eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 581eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 59905e6a2fSBarry Smith int roworiented = aij->roworiented; 608a729477SBarry Smith 610a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 6264119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 6348d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 648a729477SBarry Smith } 650a198c4cSBarry Smith #endif 661eb62cbbSBarry Smith aij->insertmode = addv; 678a729477SBarry Smith for ( i=0; i<m; i++ ) { 680a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 694b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 704b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 710a198c4cSBarry Smith #endif 724b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 734b0e389bSBarry Smith row = im[i] - rstart; 741eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 754b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 764b0e389bSBarry Smith col = in[j] - cstart; 774b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 780a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 791eb62cbbSBarry Smith } 800a198c4cSBarry Smith #if defined(PETSC_BOPT_g) 810a198c4cSBarry Smith else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 820a198c4cSBarry Smith else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 830a198c4cSBarry Smith #endif 841eb62cbbSBarry Smith else { 85227d817aSBarry Smith if (mat->was_assembled) { 86905e6a2fSBarry Smith if (!aij->colmap) { 87905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 88905e6a2fSBarry Smith } 89905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 90ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 912493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 924b0e389bSBarry Smith col = in[j]; 93d6dfbf8fSBarry Smith } 949e25ed09SBarry Smith } 954b0e389bSBarry Smith else col = in[j]; 964b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 970a198c4cSBarry Smith ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 981eb62cbbSBarry Smith } 991eb62cbbSBarry Smith } 1001eb62cbbSBarry Smith } 1011eb62cbbSBarry Smith else { 10290f02eecSBarry Smith if (roworiented && !aij->donotstash) { 1034b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 1044b0e389bSBarry Smith } 1054b0e389bSBarry Smith else { 10690f02eecSBarry Smith if (!aij->donotstash) { 1074b0e389bSBarry Smith row = im[i]; 1084b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 1094b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 1104b0e389bSBarry Smith } 1114b0e389bSBarry Smith } 1121eb62cbbSBarry Smith } 1138a729477SBarry Smith } 11490f02eecSBarry Smith } 1158a729477SBarry Smith return 0; 1168a729477SBarry Smith } 1178a729477SBarry Smith 118b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 119b49de8d1SLois Curfman McInnes { 120b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 121b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 122b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 123b49de8d1SLois Curfman McInnes 124b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 125b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 126b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 127b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 128b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 129b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 130b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 131b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 132b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 133b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 134b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 135b49de8d1SLois Curfman McInnes } 136b49de8d1SLois Curfman McInnes else { 137905e6a2fSBarry Smith if (!aij->colmap) { 138905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 139905e6a2fSBarry Smith } 140905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 141e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 142d9d09a02SSatish Balay else { 143b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 144b49de8d1SLois Curfman McInnes } 145b49de8d1SLois Curfman McInnes } 146b49de8d1SLois Curfman McInnes } 147d9d09a02SSatish Balay } 148b49de8d1SLois Curfman McInnes else { 149b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 150b49de8d1SLois Curfman McInnes } 151b49de8d1SLois Curfman McInnes } 152b49de8d1SLois Curfman McInnes return 0; 153b49de8d1SLois Curfman McInnes } 154b49de8d1SLois Curfman McInnes 155fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1568a729477SBarry Smith { 15744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 158d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 15917699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 16017699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1611eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1626abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1631eb62cbbSBarry Smith InsertMode addv; 1641eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1651eb62cbbSBarry Smith 1661eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 167d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 168dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 169bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1701eb62cbbSBarry Smith } 1711eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1721eb62cbbSBarry Smith 1731eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1740452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 175cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1760452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1771eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1781eb62cbbSBarry Smith idx = aij->stash.idx[i]; 17917699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1801eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1811eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1828a729477SBarry Smith } 1838a729477SBarry Smith } 1848a729477SBarry Smith } 18517699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1861eb62cbbSBarry Smith 1871eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1880452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 18917699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 19017699dbbSLois Curfman McInnes nreceives = work[rank]; 19117699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 19217699dbbSLois Curfman McInnes nmax = work[rank]; 1930452661fSBarry Smith PetscFree(work); 1941eb62cbbSBarry Smith 1951eb62cbbSBarry Smith /* post receives: 1961eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1971eb62cbbSBarry Smith (global index,value) we store the global index as a double 198d6dfbf8fSBarry Smith to simplify the message passing. 1991eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 2001eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 2011eb62cbbSBarry Smith this is a lot of wasted space. 2021eb62cbbSBarry Smith 2031eb62cbbSBarry Smith 2041eb62cbbSBarry Smith This could be done better. 2051eb62cbbSBarry Smith */ 2060452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 20778b31e54SBarry Smith CHKPTRQ(rvalues); 2080452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 20978b31e54SBarry Smith CHKPTRQ(recv_waits); 2101eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 211416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 2121eb62cbbSBarry Smith comm,recv_waits+i); 2131eb62cbbSBarry Smith } 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith /* do sends: 2161eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 2171eb62cbbSBarry Smith the ith processor 2181eb62cbbSBarry Smith */ 2190452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 2200452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 22178b31e54SBarry Smith CHKPTRQ(send_waits); 2220452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2231eb62cbbSBarry Smith starts[0] = 0; 22417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2251eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2261eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2271eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2281eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2291eb62cbbSBarry Smith } 2300452661fSBarry Smith PetscFree(owner); 2311eb62cbbSBarry Smith starts[0] = 0; 23217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2331eb62cbbSBarry Smith count = 0; 23417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2351eb62cbbSBarry Smith if (procs[i]) { 236416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2371eb62cbbSBarry Smith comm,send_waits+count++); 2381eb62cbbSBarry Smith } 2391eb62cbbSBarry Smith } 2400452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2411eb62cbbSBarry Smith 2421eb62cbbSBarry Smith /* Free cache space */ 2432191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 24478b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2451eb62cbbSBarry Smith 2461eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2471eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2481eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2491eb62cbbSBarry Smith aij->rmax = nmax; 2501eb62cbbSBarry Smith 2511eb62cbbSBarry Smith return 0; 2521eb62cbbSBarry Smith } 25344a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2541eb62cbbSBarry Smith 255fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2561eb62cbbSBarry Smith { 25744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 2581eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 259416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 260905e6a2fSBarry Smith int row,col,other_disassembled; 2611eb62cbbSBarry Smith Scalar *values,val; 2621eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2631eb62cbbSBarry Smith 2641eb62cbbSBarry Smith /* wait on receives */ 2651eb62cbbSBarry Smith while (count) { 266d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2671eb62cbbSBarry Smith /* unpack receives into our local space */ 268d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 269c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2701eb62cbbSBarry Smith n = n/3; 2711eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 272227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 273227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2741eb62cbbSBarry Smith val = values[3*i+2]; 2751eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2761eb62cbbSBarry Smith col -= aij->cstart; 2771eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2781eb62cbbSBarry Smith } 2791eb62cbbSBarry Smith else { 28055a1b374SBarry Smith if (mat->was_assembled || mat->assembled) { 281905e6a2fSBarry Smith if (!aij->colmap) { 282905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 283905e6a2fSBarry Smith } 284905e6a2fSBarry Smith col = aij->colmap[col] - 1; 285ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2862493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 288d6dfbf8fSBarry Smith } 2899e25ed09SBarry Smith } 2901eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2911eb62cbbSBarry Smith } 2921eb62cbbSBarry Smith } 2931eb62cbbSBarry Smith count--; 2941eb62cbbSBarry Smith } 2950452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2961eb62cbbSBarry Smith 2971eb62cbbSBarry Smith /* wait on sends */ 2981eb62cbbSBarry Smith if (aij->nsends) { 2990a198c4cSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3001eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 3010452661fSBarry Smith PetscFree(send_status); 3021eb62cbbSBarry Smith } 3030452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 3041eb62cbbSBarry Smith 30564119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 30678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 30778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 3081eb62cbbSBarry Smith 3092493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 3102493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 311227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 312227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 3132493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 3142493cbb0SBarry Smith } 3152493cbb0SBarry Smith 3166d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 31778b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 3185e42470aSBarry Smith } 31978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 32078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 3215e42470aSBarry Smith 3227a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3238a729477SBarry Smith return 0; 3248a729477SBarry Smith } 3258a729477SBarry Smith 3262ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3271eb62cbbSBarry Smith { 32844a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 329dbd7a890SLois Curfman McInnes int ierr; 33078b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 33178b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3321eb62cbbSBarry Smith return 0; 3331eb62cbbSBarry Smith } 3341eb62cbbSBarry Smith 3351eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3361eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3371eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3381eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3391eb62cbbSBarry Smith routine. 3401eb62cbbSBarry Smith */ 34144a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3421eb62cbbSBarry Smith { 34344a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 34417699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3456abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 34617699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3475392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 348d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 349d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3501eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3511eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3521eb62cbbSBarry Smith IS istmp; 3531eb62cbbSBarry Smith 35477c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 35578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3561eb62cbbSBarry Smith 3571eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3580452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 359cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3600452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3611eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3621eb62cbbSBarry Smith idx = rows[i]; 3631eb62cbbSBarry Smith found = 0; 36417699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3651eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3661eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3671eb62cbbSBarry Smith } 3681eb62cbbSBarry Smith } 369bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3701eb62cbbSBarry Smith } 37117699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3721eb62cbbSBarry Smith 3731eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3740452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 37517699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 37617699dbbSLois Curfman McInnes nrecvs = work[rank]; 37717699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 37817699dbbSLois Curfman McInnes nmax = work[rank]; 3790452661fSBarry Smith PetscFree(work); 3801eb62cbbSBarry Smith 3811eb62cbbSBarry Smith /* post receives: */ 3820452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 38378b31e54SBarry Smith CHKPTRQ(rvalues); 3840452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 38578b31e54SBarry Smith CHKPTRQ(recv_waits); 3861eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 387416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3881eb62cbbSBarry Smith } 3891eb62cbbSBarry Smith 3901eb62cbbSBarry Smith /* do sends: 3911eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3921eb62cbbSBarry Smith the ith processor 3931eb62cbbSBarry Smith */ 3940452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3950452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 39678b31e54SBarry Smith CHKPTRQ(send_waits); 3970452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3981eb62cbbSBarry Smith starts[0] = 0; 39917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4001eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 4011eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 4021eb62cbbSBarry Smith } 4031eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 4041eb62cbbSBarry Smith 4051eb62cbbSBarry Smith starts[0] = 0; 40617699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 4071eb62cbbSBarry Smith count = 0; 40817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 4091eb62cbbSBarry Smith if (procs[i]) { 410416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 4111eb62cbbSBarry Smith } 4121eb62cbbSBarry Smith } 4130452661fSBarry Smith PetscFree(starts); 4141eb62cbbSBarry Smith 41517699dbbSLois Curfman McInnes base = owners[rank]; 4161eb62cbbSBarry Smith 4171eb62cbbSBarry Smith /* wait on receives */ 4180452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 4191eb62cbbSBarry Smith source = lens + nrecvs; 4201eb62cbbSBarry Smith count = nrecvs; slen = 0; 4211eb62cbbSBarry Smith while (count) { 422d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4231eb62cbbSBarry Smith /* unpack receives into our local space */ 4241eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 425d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 426d6dfbf8fSBarry Smith lens[imdex] = n; 4271eb62cbbSBarry Smith slen += n; 4281eb62cbbSBarry Smith count--; 4291eb62cbbSBarry Smith } 4300452661fSBarry Smith PetscFree(recv_waits); 4311eb62cbbSBarry Smith 4321eb62cbbSBarry Smith /* move the data into the send scatter */ 4330452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4341eb62cbbSBarry Smith count = 0; 4351eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4361eb62cbbSBarry Smith values = rvalues + i*nmax; 4371eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4381eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4391eb62cbbSBarry Smith } 4401eb62cbbSBarry Smith } 4410452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4420452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4431eb62cbbSBarry Smith 4441eb62cbbSBarry Smith /* actually zap the local rows */ 445537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 446464493b3SBarry Smith PLogObjectParent(A,istmp); 4470452661fSBarry Smith PetscFree(lrows); 44878b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 44978b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 45078b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4511eb62cbbSBarry Smith 4521eb62cbbSBarry Smith /* wait on sends */ 4531eb62cbbSBarry Smith if (nsends) { 4540452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 45578b31e54SBarry Smith CHKPTRQ(send_status); 4561eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4570452661fSBarry Smith PetscFree(send_status); 4581eb62cbbSBarry Smith } 4590452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4601eb62cbbSBarry Smith 4611eb62cbbSBarry Smith return 0; 4621eb62cbbSBarry Smith } 4631eb62cbbSBarry Smith 464416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4651eb62cbbSBarry Smith { 466416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467fbd6ef76SBarry Smith int ierr,nt; 468416022c9SBarry Smith 469a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 470fbd6ef76SBarry Smith if (nt != a->n) { 47147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 472fbd6ef76SBarry Smith } 473*43a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 47438597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 475*43a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 47638597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4771eb62cbbSBarry Smith return 0; 4781eb62cbbSBarry Smith } 4791eb62cbbSBarry Smith 480416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481da3a660dSBarry Smith { 482416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483da3a660dSBarry Smith int ierr; 484*43a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 48538597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 486*43a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 48738597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 488da3a660dSBarry Smith return 0; 489da3a660dSBarry Smith } 490da3a660dSBarry Smith 491416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 492da3a660dSBarry Smith { 493416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 494da3a660dSBarry Smith int ierr; 495da3a660dSBarry Smith 496da3a660dSBarry Smith /* do nondiagonal part */ 49738597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 498da3a660dSBarry Smith /* send it on its way */ 499537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 500da3a660dSBarry Smith /* do local part */ 50138597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 502da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 503da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 504da3a660dSBarry Smith /* but is not perhaps always true. */ 505537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 506da3a660dSBarry Smith return 0; 507da3a660dSBarry Smith } 508da3a660dSBarry Smith 509416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 510da3a660dSBarry Smith { 511416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512da3a660dSBarry Smith int ierr; 513da3a660dSBarry Smith 514da3a660dSBarry Smith /* do nondiagonal part */ 51538597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 516da3a660dSBarry Smith /* send it on its way */ 517537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 518da3a660dSBarry Smith /* do local part */ 51938597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 520da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 521da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 522da3a660dSBarry Smith /* but is not perhaps always true. */ 5230a198c4cSBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 524da3a660dSBarry Smith return 0; 525da3a660dSBarry Smith } 526da3a660dSBarry Smith 5271eb62cbbSBarry Smith /* 5281eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5291eb62cbbSBarry Smith diagonal block 5301eb62cbbSBarry Smith */ 531416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5321eb62cbbSBarry Smith { 533416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 53444cd7ae7SLois Curfman McInnes if (a->M != a->N) 53544cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 5365baf8537SBarry Smith if (a->rstart != a->cstart || a->rend != a->cend) { 5375baf8537SBarry Smith SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 538416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5391eb62cbbSBarry Smith } 5401eb62cbbSBarry Smith 541052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 542052efed2SBarry Smith { 543052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 544052efed2SBarry Smith int ierr; 545052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 546052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 547052efed2SBarry Smith return 0; 548052efed2SBarry Smith } 549052efed2SBarry Smith 55044a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5511eb62cbbSBarry Smith { 5521eb62cbbSBarry Smith Mat mat = (Mat) obj; 55344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5541eb62cbbSBarry Smith int ierr; 555a5a9c739SBarry Smith #if defined(PETSC_LOG) 5566e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 557a5a9c739SBarry Smith #endif 5580452661fSBarry Smith PetscFree(aij->rowners); 55978b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 56078b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5610452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5620452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5631eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 564a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5657a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5660452661fSBarry Smith PetscFree(aij); 56790f02eecSBarry Smith if (mat->mapping) { 56890f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 56990f02eecSBarry Smith } 570a5a9c739SBarry Smith PLogObjectDestroy(mat); 5710452661fSBarry Smith PetscHeaderDestroy(mat); 5721eb62cbbSBarry Smith return 0; 5731eb62cbbSBarry Smith } 5747857610eSBarry Smith #include "draw.h" 575b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 576ee50ffe9SBarry Smith 577416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5781eb62cbbSBarry Smith { 579416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 580416022c9SBarry Smith int ierr; 581416022c9SBarry Smith 58217699dbbSLois Curfman McInnes if (aij->size == 1) { 583416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 584416022c9SBarry Smith } 585a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 586416022c9SBarry Smith return 0; 587416022c9SBarry Smith } 588416022c9SBarry Smith 589d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 590416022c9SBarry Smith { 59144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 592dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 593a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 594d636dbe3SBarry Smith FILE *fd; 59519bcc07fSBarry Smith ViewerType vtype; 596416022c9SBarry Smith 59719bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 59819bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 59990ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 6000a198c4cSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 6014e220ebcSLois Curfman McInnes MatInfo info; 6024e220ebcSLois Curfman McInnes int flg; 603a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 60490ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6054e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 60695e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 60777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 60895e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 6094e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 61095e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 6114e220ebcSLois Curfman McInnes rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 6124e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 6134e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 6144e220ebcSLois Curfman McInnes ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 6154e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 616a56f8943SBarry Smith fflush(fd); 61777c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 618a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 619416022c9SBarry Smith return 0; 620416022c9SBarry Smith } 6210a198c4cSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 62208480c60SBarry Smith return 0; 62308480c60SBarry Smith } 624416022c9SBarry Smith } 625416022c9SBarry Smith 62619bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 62719bcc07fSBarry Smith Draw draw; 62819bcc07fSBarry Smith PetscTruth isnull; 62919bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 63019bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 63119bcc07fSBarry Smith } 63219bcc07fSBarry Smith 63319bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 63490ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 63577c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 636d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 63717699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6381eb62cbbSBarry Smith aij->cend); 63978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 64078b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 641d13ab20cSBarry Smith fflush(fd); 64277c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 643d13ab20cSBarry Smith } 644416022c9SBarry Smith else { 645a56f8943SBarry Smith int size = aij->size; 646a56f8943SBarry Smith rank = aij->rank; 64717699dbbSLois Curfman McInnes if (size == 1) { 64878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 64995373324SBarry Smith } 65095373324SBarry Smith else { 65195373324SBarry Smith /* assemble the entire matrix onto first processor. */ 65295373324SBarry Smith Mat A; 653ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6542eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 65595373324SBarry Smith Scalar *a; 6562ee70a88SLois Curfman McInnes 65717699dbbSLois Curfman McInnes if (!rank) { 658b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 659c451ab25SLois Curfman McInnes CHKERRQ(ierr); 66095373324SBarry Smith } 66195373324SBarry Smith else { 662b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 663c451ab25SLois Curfman McInnes CHKERRQ(ierr); 66495373324SBarry Smith } 665464493b3SBarry Smith PLogObjectParent(mat,A); 666416022c9SBarry Smith 66795373324SBarry Smith /* copy over the A part */ 668ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6692ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 67095373324SBarry Smith row = aij->rstart; 671dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 67295373324SBarry Smith for ( i=0; i<m; i++ ) { 673416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 67495373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 67595373324SBarry Smith } 6762ee70a88SLois Curfman McInnes aj = Aloc->j; 677dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 67895373324SBarry Smith 67995373324SBarry Smith /* copy over the B part */ 680ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6812ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 68295373324SBarry Smith row = aij->rstart; 6830452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 684dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 68595373324SBarry Smith for ( i=0; i<m; i++ ) { 686416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 68795373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 68895373324SBarry Smith } 6890452661fSBarry Smith PetscFree(ct); 6906d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6916d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 69217699dbbSLois Curfman McInnes if (!rank) { 69378b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 69495373324SBarry Smith } 69578b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 69695373324SBarry Smith } 69795373324SBarry Smith } 6981eb62cbbSBarry Smith return 0; 6991eb62cbbSBarry Smith } 7001eb62cbbSBarry Smith 701416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 702416022c9SBarry Smith { 703416022c9SBarry Smith Mat mat = (Mat) obj; 704416022c9SBarry Smith int ierr; 70519bcc07fSBarry Smith ViewerType vtype; 706416022c9SBarry Smith 70719bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 70819bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 70919bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 710d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 711416022c9SBarry Smith } 71219bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 713416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 714416022c9SBarry Smith } 715416022c9SBarry Smith return 0; 716416022c9SBarry Smith } 717416022c9SBarry Smith 7181eb62cbbSBarry Smith /* 7191eb62cbbSBarry Smith This has to provide several versions. 7201eb62cbbSBarry Smith 7211eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7221eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 723d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7241eb62cbbSBarry Smith */ 725fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 726dbb450caSBarry Smith double fshift,int its,Vec xx) 7278a729477SBarry Smith { 72844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 729d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 730ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 731c16cb8f2SBarry Smith Scalar *b,*x,*xs,*ls,d,*v,sum; 7326abc6512SBarry Smith int ierr,*idx, *diag; 733416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 7348a729477SBarry Smith 735d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 736dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 737dbb450caSBarry Smith ls = ls + shift; 738ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 739d6dfbf8fSBarry Smith diag = A->diag; 740c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 741da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 74238597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 743da3a660dSBarry Smith } 744*43a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 74578b31e54SBarry Smith CHKERRQ(ierr); 746*43a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 74778b31e54SBarry Smith CHKERRQ(ierr); 748d6dfbf8fSBarry Smith while (its--) { 749d6dfbf8fSBarry Smith /* go down through the rows */ 750d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 751d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 752dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 753dbb450caSBarry Smith v = A->a + A->i[i] + shift; 754d6dfbf8fSBarry Smith sum = b[i]; 755d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 756dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 757d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 758dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 759dbb450caSBarry Smith v = B->a + B->i[i] + shift; 760d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 76155a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 762d6dfbf8fSBarry Smith } 763d6dfbf8fSBarry Smith /* come up through the rows */ 764d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 765d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 766dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 767dbb450caSBarry Smith v = A->a + A->i[i] + shift; 768d6dfbf8fSBarry Smith sum = b[i]; 769d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 770dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 771d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 772dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 773dbb450caSBarry Smith v = B->a + B->i[i] + shift; 774d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 77555a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 776d6dfbf8fSBarry Smith } 777d6dfbf8fSBarry Smith } 778d6dfbf8fSBarry Smith } 779d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 780da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 78138597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 782da3a660dSBarry Smith } 783*43a90d84SBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 78478b31e54SBarry Smith CHKERRQ(ierr); 785*43a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 78678b31e54SBarry Smith CHKERRQ(ierr); 787d6dfbf8fSBarry Smith while (its--) { 788d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 789d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 790dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 791dbb450caSBarry Smith v = A->a + A->i[i] + shift; 792d6dfbf8fSBarry Smith sum = b[i]; 793d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 794dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 795d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 796dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 797dbb450caSBarry Smith v = B->a + B->i[i] + shift; 798d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 79955a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 800d6dfbf8fSBarry Smith } 801d6dfbf8fSBarry Smith } 802d6dfbf8fSBarry Smith } 803d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 804da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 80538597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 806da3a660dSBarry Smith } 807*43a90d84SBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 80878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 809*43a90d84SBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 81078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 811d6dfbf8fSBarry Smith while (its--) { 812d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 813d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 814dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 815dbb450caSBarry Smith v = A->a + A->i[i] + shift; 816d6dfbf8fSBarry Smith sum = b[i]; 817d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 818dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 819d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 820dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 821dbb450caSBarry Smith v = B->a + B->i[i] + shift; 822d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 82355a1b374SBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 824d6dfbf8fSBarry Smith } 825d6dfbf8fSBarry Smith } 826d6dfbf8fSBarry Smith } 827c16cb8f2SBarry Smith else { 828c16cb8f2SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 829c16cb8f2SBarry Smith } 8308a729477SBarry Smith return 0; 8318a729477SBarry Smith } 832a66be287SLois Curfman McInnes 8334e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 834a66be287SLois Curfman McInnes { 835a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 836a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 8374e220ebcSLois Curfman McInnes int ierr; 8384e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 839a66be287SLois Curfman McInnes 8404e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 8414e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 8424e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 8434e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 8444e220ebcSLois Curfman McInnes info->block_size = 1.0; 8454e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 8464e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 8474e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 8484e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 8494e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 8504e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 851a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 8524e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 8534e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 8544e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 8554e220ebcSLois Curfman McInnes info->memory = isend[3]; 8564e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 857a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 8584e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 8594e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8604e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8614e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8624e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8634e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 864a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 8654e220ebcSLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 8664e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8674e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8684e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8694e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8704e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 871a66be287SLois Curfman McInnes } 8724e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8734e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 8744e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 8754e220ebcSLois Curfman McInnes 876a66be287SLois Curfman McInnes return 0; 877a66be287SLois Curfman McInnes } 878a66be287SLois Curfman McInnes 879299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 880299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 881299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 882299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 883299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 884299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 885299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 886299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 887299609e3SLois Curfman McInnes 888416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 889c74985f6SBarry Smith { 890c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 891c74985f6SBarry Smith 8926d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 8936d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 894b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 895b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 896b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 897b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 898aeafbbfcSLois Curfman McInnes a->roworiented = 1; 899c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 900c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 901b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 9026d4a8577SBarry Smith op == MAT_SYMMETRIC || 9036d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 9046d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 90594a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 9066d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 9074b0e389bSBarry Smith a->roworiented = 0; 9084b0e389bSBarry Smith MatSetOption(a->A,op); 9094b0e389bSBarry Smith MatSetOption(a->B,op); 91090f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 91190f02eecSBarry Smith a->donotstash = 1; 91290f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 9136d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 914c0bbcb79SLois Curfman McInnes else 9154d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 916c74985f6SBarry Smith return 0; 917c74985f6SBarry Smith } 918c74985f6SBarry Smith 919d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 920c74985f6SBarry Smith { 92144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 922c74985f6SBarry Smith *m = mat->M; *n = mat->N; 923c74985f6SBarry Smith return 0; 924c74985f6SBarry Smith } 925c74985f6SBarry Smith 926d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 927c74985f6SBarry Smith { 92844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 929b7c46309SBarry Smith *m = mat->m; *n = mat->N; 930c74985f6SBarry Smith return 0; 931c74985f6SBarry Smith } 932c74985f6SBarry Smith 933d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 934c74985f6SBarry Smith { 93544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 936c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 937c74985f6SBarry Smith return 0; 938c74985f6SBarry Smith } 939c74985f6SBarry Smith 9406d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9416d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 9426d84be18SBarry Smith 9436d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 94439e00950SLois Curfman McInnes { 945154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 94670f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 947154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 948154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 94970f0671dSBarry Smith int *cmap, *idx_p; 95039e00950SLois Curfman McInnes 9517a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 9527a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 9537a0afa10SBarry Smith 95470f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 9557a0afa10SBarry Smith /* 9567a0afa10SBarry Smith allocate enough space to hold information from the longest row. 9577a0afa10SBarry Smith */ 9587a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 959c16cb8f2SBarry Smith int max = 1,m = mat->m,tmp; 960c16cb8f2SBarry Smith for ( i=0; i<m; i++ ) { 9617a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 9627a0afa10SBarry Smith if (max < tmp) { max = tmp; } 9637a0afa10SBarry Smith } 9647a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 9657a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 9667a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 9677a0afa10SBarry Smith } 9687a0afa10SBarry Smith 969b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 970abc0e9e4SLois Curfman McInnes lrow = row - rstart; 97139e00950SLois Curfman McInnes 972154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 973154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 974154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 97538597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 97638597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 977154123eaSLois Curfman McInnes nztot = nzA + nzB; 978154123eaSLois Curfman McInnes 97970f0671dSBarry Smith cmap = mat->garray; 980154123eaSLois Curfman McInnes if (v || idx) { 981154123eaSLois Curfman McInnes if (nztot) { 982154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 98370f0671dSBarry Smith int imark = -1; 984154123eaSLois Curfman McInnes if (v) { 98570f0671dSBarry Smith *v = v_p = mat->rowvalues; 98639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 98770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 988154123eaSLois Curfman McInnes else break; 989154123eaSLois Curfman McInnes } 990154123eaSLois Curfman McInnes imark = i; 99170f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 99270f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 993154123eaSLois Curfman McInnes } 994154123eaSLois Curfman McInnes if (idx) { 99570f0671dSBarry Smith *idx = idx_p = mat->rowindices; 99670f0671dSBarry Smith if (imark > -1) { 99770f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 99870f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 99970f0671dSBarry Smith } 100070f0671dSBarry Smith } else { 1001154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 100270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1003154123eaSLois Curfman McInnes else break; 1004154123eaSLois Curfman McInnes } 1005154123eaSLois Curfman McInnes imark = i; 100670f0671dSBarry Smith } 100770f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 100870f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 100939e00950SLois Curfman McInnes } 101039e00950SLois Curfman McInnes } 10111ca473b0SSatish Balay else { 10121ca473b0SSatish Balay if (idx) *idx = 0; 10131ca473b0SSatish Balay if (v) *v = 0; 10141ca473b0SSatish Balay } 1015154123eaSLois Curfman McInnes } 101639e00950SLois Curfman McInnes *nz = nztot; 101738597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 101838597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 101939e00950SLois Curfman McInnes return 0; 102039e00950SLois Curfman McInnes } 102139e00950SLois Curfman McInnes 10226d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 102339e00950SLois Curfman McInnes { 10247a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 10257a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 10267a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 10277a0afa10SBarry Smith } 10287a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 102939e00950SLois Curfman McInnes return 0; 103039e00950SLois Curfman McInnes } 103139e00950SLois Curfman McInnes 1032cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1033855ac2c5SLois Curfman McInnes { 1034855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1035ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1036416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1037416022c9SBarry Smith double sum = 0.0; 103804ca555eSLois Curfman McInnes Scalar *v; 103904ca555eSLois Curfman McInnes 104017699dbbSLois Curfman McInnes if (aij->size == 1) { 104114183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 104237fa93a5SLois Curfman McInnes } else { 104304ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 104404ca555eSLois Curfman McInnes v = amat->a; 104504ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 104604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 104704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 104804ca555eSLois Curfman McInnes #else 104904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 105004ca555eSLois Curfman McInnes #endif 105104ca555eSLois Curfman McInnes } 105204ca555eSLois Curfman McInnes v = bmat->a; 105304ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 105404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 105504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 105604ca555eSLois Curfman McInnes #else 105704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 105804ca555eSLois Curfman McInnes #endif 105904ca555eSLois Curfman McInnes } 10606d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 106104ca555eSLois Curfman McInnes *norm = sqrt(*norm); 106204ca555eSLois Curfman McInnes } 106304ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 106404ca555eSLois Curfman McInnes double *tmp, *tmp2; 106504ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 10660452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 10670452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1068cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 106904ca555eSLois Curfman McInnes *norm = 0.0; 107004ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 107104ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1072579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 107304ca555eSLois Curfman McInnes } 107404ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 107504ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1076579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 107704ca555eSLois Curfman McInnes } 10786d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 107904ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 108004ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 108104ca555eSLois Curfman McInnes } 10820452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 108304ca555eSLois Curfman McInnes } 108404ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1085515d9167SLois Curfman McInnes double ntemp = 0.0; 108604ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1087dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 108804ca555eSLois Curfman McInnes sum = 0.0; 108904ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1090cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 109104ca555eSLois Curfman McInnes } 1092dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 109304ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1094cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 109504ca555eSLois Curfman McInnes } 1096515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 109704ca555eSLois Curfman McInnes } 10986d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 109904ca555eSLois Curfman McInnes } 110004ca555eSLois Curfman McInnes else { 1101515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 110204ca555eSLois Curfman McInnes } 110337fa93a5SLois Curfman McInnes } 1104855ac2c5SLois Curfman McInnes return 0; 1105855ac2c5SLois Curfman McInnes } 1106855ac2c5SLois Curfman McInnes 11070de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1108b7c46309SBarry Smith { 1109b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1110dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1111416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1112416022c9SBarry Smith Mat B; 1113b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1114b7c46309SBarry Smith Scalar *array; 1115b7c46309SBarry Smith 11163638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 11173638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1118b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1119b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1120b7c46309SBarry Smith 1121b7c46309SBarry Smith /* copy over the A part */ 1122ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1123b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1124b7c46309SBarry Smith row = a->rstart; 1125dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1126b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1127416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1128b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1129b7c46309SBarry Smith } 1130b7c46309SBarry Smith aj = Aloc->j; 11314af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1132b7c46309SBarry Smith 1133b7c46309SBarry Smith /* copy over the B part */ 1134ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1135b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1136b7c46309SBarry Smith row = a->rstart; 11370452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1138dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1139b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1140416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1141b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1142b7c46309SBarry Smith } 11430452661fSBarry Smith PetscFree(ct); 11446d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11456d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11463638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 11470de55854SLois Curfman McInnes *matout = B; 11480de55854SLois Curfman McInnes } else { 11490de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 11500452661fSBarry Smith PetscFree(a->rowners); 11510de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 11520de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 11530452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 11540452661fSBarry Smith if (a->garray) PetscFree(a->garray); 11550de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1156a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 11570452661fSBarry Smith PetscFree(a); 1158416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 11590452661fSBarry Smith PetscHeaderDestroy(B); 11600de55854SLois Curfman McInnes } 1161b7c46309SBarry Smith return 0; 1162b7c46309SBarry Smith } 1163b7c46309SBarry Smith 11644b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1165a008b906SSatish Balay { 11664b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 11674b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1168a008b906SSatish Balay int ierr,s1,s2,s3; 1169a008b906SSatish Balay 11704b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 11714b967eb1SSatish Balay if (rr) { 11724b967eb1SSatish Balay s3 = aij->n; 11734b967eb1SSatish Balay VecGetLocalSize_Fast(rr,s1); 11744b967eb1SSatish Balay if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 11754b967eb1SSatish Balay /* Overlap communication with computation. */ 1176*43a90d84SBarry Smith ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1177a008b906SSatish Balay } 11784b967eb1SSatish Balay if (ll) { 11794b967eb1SSatish Balay VecGetLocalSize_Fast(ll,s1); 11804b967eb1SSatish Balay if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1181c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 11824b967eb1SSatish Balay } 11834b967eb1SSatish Balay /* scale the diagonal block */ 1184c351683dSSatish Balay ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 11854b967eb1SSatish Balay 11864b967eb1SSatish Balay if (rr) { 11874b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1188*43a90d84SBarry Smith ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1189c351683dSSatish Balay ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 11904b967eb1SSatish Balay } 11914b967eb1SSatish Balay 1192a008b906SSatish Balay return 0; 1193a008b906SSatish Balay } 1194a008b906SSatish Balay 1195a008b906SSatish Balay 1196682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1197682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1198682d7d0cSBarry Smith { 1199682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1200682d7d0cSBarry Smith 1201682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1202682d7d0cSBarry Smith else return 0; 1203682d7d0cSBarry Smith } 1204682d7d0cSBarry Smith 12055a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 12065a838052SSatish Balay { 12075a838052SSatish Balay *bs = 1; 12085a838052SSatish Balay return 0; 12095a838052SSatish Balay } 12105a838052SSatish Balay 1211fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12123d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12132f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 12140a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 12150a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 12168a729477SBarry Smith /* -------------------------------------------------------------------*/ 12172ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 121839e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 121944a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 122044a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 122136ce4990SBarry Smith 0,0, 122236ce4990SBarry Smith 0,0, 122336ce4990SBarry Smith 0,0, 122444a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1225b7c46309SBarry Smith MatTranspose_MPIAIJ, 1226a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1227a008b906SSatish Balay MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1228ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12291eb62cbbSBarry Smith 0, 1230299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 123136ce4990SBarry Smith 0,0,0,0, 1232d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 123336ce4990SBarry Smith 0,0, 12343e85bedfSBarry Smith 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1235b49de8d1SLois Curfman McInnes 0,0,0, 1236598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1237052efed2SBarry Smith MatPrintHelp_MPIAIJ, 12383b2fbd54SBarry Smith MatScale_MPIAIJ,0,0,0, 12390a198c4cSBarry Smith MatGetBlockSize_MPIAIJ,0,0,0,0, 12400a198c4cSBarry Smith MatFDColoringCreate_MPIAIJ}; 124136ce4990SBarry Smith 12428a729477SBarry Smith 12431987afe7SBarry Smith /*@C 1244ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 12453a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 12463a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 12473a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 12483a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 12498a729477SBarry Smith 12508a729477SBarry Smith Input Parameters: 12511eb62cbbSBarry Smith . comm - MPI communicator 12527d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 125392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 125492e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 125592e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 125692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 125792e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 12587d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 125992e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1260ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1261ff756334SLois Curfman McInnes (same for all local rows) 12622bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 126392e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 12642bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 12652bd5e0b2SLois Curfman McInnes it is zero. 12662bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1267ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 12682bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 12692bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 12702bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 12718a729477SBarry Smith 1272ff756334SLois Curfman McInnes Output Parameter: 127344cd7ae7SLois Curfman McInnes . A - the matrix 12748a729477SBarry Smith 1275ff756334SLois Curfman McInnes Notes: 1276ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1277ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 12780002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 12790002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1280ff756334SLois Curfman McInnes 1281ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1282ff756334SLois Curfman McInnes (possibly both). 1283ff756334SLois Curfman McInnes 12845511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 12855511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 12865511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 12875511cfe3SLois Curfman McInnes 12885511cfe3SLois Curfman McInnes Options Database Keys: 12895511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 12906e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 12916e62573dSLois Curfman McInnes $ (max limit=5) 12926e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 12936e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 12946e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 12955511cfe3SLois Curfman McInnes 1296e0245417SLois Curfman McInnes Storage Information: 1297e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1298e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1299e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1300e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1301e0245417SLois Curfman McInnes 1302e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13035ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13045ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 13055ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 13065ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1307ff756334SLois Curfman McInnes 13085511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 13095511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 13102191d07cSBarry Smith 1311b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1312b810aeb4SBarry Smith $ ------------------- 13135511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 13145511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 13155511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 13165511cfe3SLois Curfman McInnes $ ------------------- 1317b810aeb4SBarry Smith $ 1318b810aeb4SBarry Smith 13195511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 13205511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 13215511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 13225511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 13235511cfe3SLois Curfman McInnes 13245511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 13255511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 13265511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 13273d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 132892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 13293d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 13303a511b96SLois Curfman McInnes 1331dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1332ff756334SLois Curfman McInnes 1333fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13348a729477SBarry Smith @*/ 13351eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 133644cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 13378a729477SBarry Smith { 133844cd7ae7SLois Curfman McInnes Mat B; 133944cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 134036ce4990SBarry Smith int ierr, i,sum[2],work[2],size; 1341416022c9SBarry Smith 134244cd7ae7SLois Curfman McInnes *A = 0; 134344cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 134444cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 134544cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 134644cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 134744cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 134836ce4990SBarry Smith MPI_Comm_size(comm,&size); 134936ce4990SBarry Smith if (size == 1) { 135036ce4990SBarry Smith B->ops.getrowij = MatGetRowIJ_MPIAIJ; 135136ce4990SBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 135236ce4990SBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 135336ce4990SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 135436ce4990SBarry Smith B->ops.lufactor = MatLUFactor_MPIAIJ; 135536ce4990SBarry Smith B->ops.solve = MatSolve_MPIAIJ; 135636ce4990SBarry Smith B->ops.solveadd = MatSolveAdd_MPIAIJ; 135736ce4990SBarry Smith B->ops.solvetrans = MatSolveTrans_MPIAIJ; 135836ce4990SBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 135936ce4990SBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 136036ce4990SBarry Smith } 136144cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 136244cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 136344cd7ae7SLois Curfman McInnes B->factor = 0; 136444cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 136590f02eecSBarry Smith B->mapping = 0; 1366d6dfbf8fSBarry Smith 136744cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 136844cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 136944cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 13701eb62cbbSBarry Smith 1371b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 13722e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 13731987afe7SBarry Smith 1374dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13751eb62cbbSBarry Smith work[0] = m; work[1] = n; 1376d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1377dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1378dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13791eb62cbbSBarry Smith } 138044cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 138144cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 138244cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 138344cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 138444cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 138544cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 13861eb62cbbSBarry Smith 13871eb62cbbSBarry Smith /* build local table of row and column ownerships */ 138844cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 138944cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1390603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 139144cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 139244cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 139344cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 139444cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 13958a729477SBarry Smith } 139644cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 139744cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 139844cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 139944cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 140044cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 140144cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 14028a729477SBarry Smith } 140344cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 140444cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 14058a729477SBarry Smith 14065ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 140744cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 140844cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 14097b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 141044cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 141144cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 14128a729477SBarry Smith 14131eb62cbbSBarry Smith /* build cache for off array entries formed */ 141444cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 141590f02eecSBarry Smith b->donotstash = 0; 141644cd7ae7SLois Curfman McInnes b->colmap = 0; 141744cd7ae7SLois Curfman McInnes b->garray = 0; 141844cd7ae7SLois Curfman McInnes b->roworiented = 1; 14198a729477SBarry Smith 14201eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 142144cd7ae7SLois Curfman McInnes b->lvec = 0; 142244cd7ae7SLois Curfman McInnes b->Mvctx = 0; 14238a729477SBarry Smith 14247a0afa10SBarry Smith /* stuff for MatGetRow() */ 142544cd7ae7SLois Curfman McInnes b->rowindices = 0; 142644cd7ae7SLois Curfman McInnes b->rowvalues = 0; 142744cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 14287a0afa10SBarry Smith 142944cd7ae7SLois Curfman McInnes *A = B; 1430d6dfbf8fSBarry Smith return 0; 1431d6dfbf8fSBarry Smith } 1432c74985f6SBarry Smith 14333d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1434d6dfbf8fSBarry Smith { 1435d6dfbf8fSBarry Smith Mat mat; 1436416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1437a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1438d6dfbf8fSBarry Smith 1439416022c9SBarry Smith *newmat = 0; 14400452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1441a5a9c739SBarry Smith PLogObjectCreate(mat); 14420452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1443416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 144444a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 144544a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1446d6dfbf8fSBarry Smith mat->factor = matin->factor; 1447c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1448d6dfbf8fSBarry Smith 144944cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 145044cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 145144cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 145244cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1453d6dfbf8fSBarry Smith 1454416022c9SBarry Smith a->rstart = oldmat->rstart; 1455416022c9SBarry Smith a->rend = oldmat->rend; 1456416022c9SBarry Smith a->cstart = oldmat->cstart; 1457416022c9SBarry Smith a->cend = oldmat->cend; 145817699dbbSLois Curfman McInnes a->size = oldmat->size; 145917699dbbSLois Curfman McInnes a->rank = oldmat->rank; 146064119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1461bcd2baecSBarry Smith a->rowvalues = 0; 1462bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1463d6dfbf8fSBarry Smith 1464603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1465603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1466603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1467603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1468416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14692ee70a88SLois Curfman McInnes if (oldmat->colmap) { 14700452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1471416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1472416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1473416022c9SBarry Smith } else a->colmap = 0; 14743f41c07dSBarry Smith if (oldmat->garray) { 14753f41c07dSBarry Smith len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 14763f41c07dSBarry Smith a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1477464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 14783f41c07dSBarry Smith if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1479416022c9SBarry Smith } else a->garray = 0; 1480d6dfbf8fSBarry Smith 1481416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1482416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1483a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1484416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1485416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1486416022c9SBarry Smith PLogObjectParent(mat,a->A); 1487416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1488416022c9SBarry Smith PLogObjectParent(mat,a->B); 14895dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 149025cdf11fSBarry Smith if (flg) { 1491682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1492682d7d0cSBarry Smith } 14938a729477SBarry Smith *newmat = mat; 14948a729477SBarry Smith return 0; 14958a729477SBarry Smith } 1496416022c9SBarry Smith 149777c4ece6SBarry Smith #include "sys.h" 1498416022c9SBarry Smith 149919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1500416022c9SBarry Smith { 1501d65a2f8fSBarry Smith Mat A; 1502416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1503d65a2f8fSBarry Smith Scalar *vals,*svals; 150419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1505416022c9SBarry Smith MPI_Status status; 150617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1507d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 150819bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1509416022c9SBarry Smith 151017699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 151117699dbbSLois Curfman McInnes if (!rank) { 151290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 151377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1514c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1515416022c9SBarry Smith } 1516416022c9SBarry Smith 1517416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1518416022c9SBarry Smith M = header[1]; N = header[2]; 1519416022c9SBarry Smith /* determine ownership of all rows */ 152017699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15210452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1522416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1523416022c9SBarry Smith rowners[0] = 0; 152417699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1525416022c9SBarry Smith rowners[i] += rowners[i-1]; 1526416022c9SBarry Smith } 152717699dbbSLois Curfman McInnes rstart = rowners[rank]; 152817699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1529416022c9SBarry Smith 1530416022c9SBarry Smith /* distribute row lengths to all processors */ 15310452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1532416022c9SBarry Smith offlens = ourlens + (rend-rstart); 153317699dbbSLois Curfman McInnes if (!rank) { 15340452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 153577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 15360452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 153717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1538d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15390452661fSBarry Smith PetscFree(sndcounts); 1540416022c9SBarry Smith } 1541416022c9SBarry Smith else { 1542416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1543416022c9SBarry Smith } 1544416022c9SBarry Smith 154517699dbbSLois Curfman McInnes if (!rank) { 1546416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15470452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1548cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 154917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1550416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1551416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1552416022c9SBarry Smith } 1553416022c9SBarry Smith } 15540452661fSBarry Smith PetscFree(rowlengths); 1555416022c9SBarry Smith 1556416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1557416022c9SBarry Smith maxnz = 0; 155817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15590452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1560416022c9SBarry Smith } 15610452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1562416022c9SBarry Smith 1563416022c9SBarry Smith /* read in my part of the matrix column indices */ 1564416022c9SBarry Smith nz = procsnz[0]; 15650452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 156677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1567d65a2f8fSBarry Smith 1568d65a2f8fSBarry Smith /* read in every one elses and ship off */ 156917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1570d65a2f8fSBarry Smith nz = procsnz[i]; 157177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1572d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1573d65a2f8fSBarry Smith } 15740452661fSBarry Smith PetscFree(cols); 1575416022c9SBarry Smith } 1576416022c9SBarry Smith else { 1577416022c9SBarry Smith /* determine buffer space needed for message */ 1578416022c9SBarry Smith nz = 0; 1579416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1580416022c9SBarry Smith nz += ourlens[i]; 1581416022c9SBarry Smith } 15820452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1583416022c9SBarry Smith 1584416022c9SBarry Smith /* receive message of column indices*/ 1585d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1586416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1587c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1588416022c9SBarry Smith } 1589416022c9SBarry Smith 1590416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1591cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1592416022c9SBarry Smith jj = 0; 1593416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1594416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1595d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1596416022c9SBarry Smith jj++; 1597416022c9SBarry Smith } 1598416022c9SBarry Smith } 1599d65a2f8fSBarry Smith 1600d65a2f8fSBarry Smith /* create our matrix */ 1601416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1602416022c9SBarry Smith ourlens[i] -= offlens[i]; 1603416022c9SBarry Smith } 1604d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1605d65a2f8fSBarry Smith A = *newmat; 16066d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1607d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1608d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1609d65a2f8fSBarry Smith } 1610416022c9SBarry Smith 161117699dbbSLois Curfman McInnes if (!rank) { 16120452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1613416022c9SBarry Smith 1614416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1615416022c9SBarry Smith nz = procsnz[0]; 161677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1617d65a2f8fSBarry Smith 1618d65a2f8fSBarry Smith /* insert into matrix */ 1619d65a2f8fSBarry Smith jj = rstart; 1620d65a2f8fSBarry Smith smycols = mycols; 1621d65a2f8fSBarry Smith svals = vals; 1622d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1623d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1624d65a2f8fSBarry Smith smycols += ourlens[i]; 1625d65a2f8fSBarry Smith svals += ourlens[i]; 1626d65a2f8fSBarry Smith jj++; 1627416022c9SBarry Smith } 1628416022c9SBarry Smith 1629d65a2f8fSBarry Smith /* read in other processors and ship out */ 163017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1631416022c9SBarry Smith nz = procsnz[i]; 163277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1633d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1634416022c9SBarry Smith } 16350452661fSBarry Smith PetscFree(procsnz); 1636416022c9SBarry Smith } 1637d65a2f8fSBarry Smith else { 1638d65a2f8fSBarry Smith /* receive numeric values */ 16390452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1640416022c9SBarry Smith 1641d65a2f8fSBarry Smith /* receive message of values*/ 1642d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1643d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1644c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1645d65a2f8fSBarry Smith 1646d65a2f8fSBarry Smith /* insert into matrix */ 1647d65a2f8fSBarry Smith jj = rstart; 1648d65a2f8fSBarry Smith smycols = mycols; 1649d65a2f8fSBarry Smith svals = vals; 1650d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1651d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1652d65a2f8fSBarry Smith smycols += ourlens[i]; 1653d65a2f8fSBarry Smith svals += ourlens[i]; 1654d65a2f8fSBarry Smith jj++; 1655d65a2f8fSBarry Smith } 1656d65a2f8fSBarry Smith } 16570452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1658d65a2f8fSBarry Smith 16596d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 16606d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1661416022c9SBarry Smith return 0; 1662416022c9SBarry Smith } 1663